Merge branch 'upstream-linus' of master.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6 * 'upstream-linus' of master.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6: sis900 warning fixes mv643xx_eth: Place explicit port number in mv643xx_eth_platform_data pcnet32: Fix PCnet32 performance bug on non-coherent architecutres __devinit & __devexit cleanups for de2104x driver 3c59x: Handle pci_enable_device() failure while resuming dmfe: Fix link detection dmfe: fix two bugs dmfe: trivial/spelling fixes revert "drivers/net/tulip/dmfe: support basic carrier detection" ucc_geth: returns NETDEV_TX_BUSY when BD ring is full ucc_geth: Fix BD processing natsemi: netpoll fixes bonding: Improve IGMP join processing bonding: only receive ARPs for us bonding: fix double dev_add_pack

commit: 205c911da322908abe127b96d2ef2a4a2aa5109a [log] [tgz]
author: Linus Torvalds <torvalds@woody.linux-foundation.org> Tue Mar 06 17:30:59 2007 -0800
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> Tue Mar 06 17:30:59 2007 -0800
tree: 43f6878c6a255965a4061d05da68dbc1df9e0ca4
parent: c7276fde27bca89798f33c0be9543dc108468788 [diff]
parent: f3be97427172856d6865ddfedea84fa3a9f33227 [diff]
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 03eb5ed..6e92ba6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt

@@ -1685,6 +1685,22 @@
 	stifb=		[HW]
 			Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]]
 
+	sunrpc.pool_mode=
+			[NFS]
+			Control how the NFS server code allocates CPUs to
+			service thread pools.  Depending on how many NICs
+			you have and where their interrupts are bound, this
+			option will affect which CPUs will do NFS serving.
+			Note: this parameter cannot be changed while the
+			NFS server is running.
+
+			auto	    the server chooses an appropriate mode
+				    automatically using heuristics
+			global	    a single global pool contains all CPUs
+			percpu	    one pool for each CPU
+			pernode	    one pool for each NUMA node (equivalent
+				    to global on non-NUMA machines)
+
 	swiotlb=	[IA-64] Number of I/O TLB slabs
 
 	switches=	[HW,M68k]

diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 875d8a6..602660d 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c

@@ -24,7 +24,6 @@
  * an extra value to store the TSC freq
  */
 unsigned int tsc_khz;
-unsigned long long (*custom_sched_clock)(void);
 
 int tsc_disable;
 

diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
index 8dc72d5..9dfb177 100644
--- a/arch/i386/kernel/vmitime.c
+++ b/arch/i386/kernel/vmitime.c

@@ -123,12 +123,10 @@
 static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
 
 static struct irqaction vmi_timer_irq  = {
-	vmi_timer_interrupt,
-	SA_INTERRUPT,
-	CPU_MASK_NONE,
-	"VMI-alarm",
-	NULL,
-	NULL
+	.handler = vmi_timer_interrupt,
+	.flags = IRQF_DISABLED,
+	.mask = CPU_MASK_NONE,
+	.name = "VMI-alarm",
 };
 
 /* Alarm rate */

diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 2a32e5e..3c798cd 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c

@@ -158,12 +158,12 @@
 		clear_thread_flag(TIF_RESTORE_SIGMASK);
 		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
 	}
-	return(handled_sig);
+	return handled_sig;
 }
 
 int do_signal(void)
 {
-	return(kern_do_signal(&current->thread.regs));
+	return kern_do_signal(&current->thread.regs);
 }
 
 /*
@@ -186,5 +186,5 @@
 
 long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss)
 {
-	return(do_sigaltstack(uss, uoss, PT_REGS_SP(&current->thread.regs)));
+	return do_sigaltstack(uss, uoss, PT_REGS_SP(&current->thread.regs));
 }

diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 9b34fe6..dda0678 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c

@@ -419,9 +419,12 @@
 					  .offset  = code_offset
 	} } });
 	n = os_write_file(fd, &mmop, sizeof(mmop));
-	if(n != sizeof(mmop))
+	if(n != sizeof(mmop)){
+		printk("mmap args - addr = 0x%lx, fd = %d, offset = %llx\n",
+		       code, code_fd, (unsigned long long) code_offset);
 		panic("map_stub_pages : /proc/mm map for code failed, "
 		      "err = %d\n", -n);
+	}
 
 	if ( stack ) {
 		__u64 map_offset;

diff --git a/arch/um/os-Linux/trap.c b/arch/um/os-Linux/trap.c
index 1df231a..d221214 100644
--- a/arch/um/os-Linux/trap.c
+++ b/arch/um/os-Linux/trap.c

@@ -16,6 +16,7 @@
 	CHOOSE_MODE(syscall_handler_tt(sig, regs), (void) 0);
 }
 
+/* Initialized from linux_main() */
 void (*sig_info[NSIG])(int, union uml_pt_regs *);
 
 void os_fill_handlinfo(struct kern_handlers h)

diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index cacb1c8..17ee97f 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig

@@ -406,22 +406,6 @@
 	  setups function - apparently needed by the rd_load_image routine
 	  that supposes the filesystem in the image uses a 1024 blocksize.
 
-config BLK_DEV_INITRD
-	bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
-	depends on BROKEN || !FRV
-	help
-	  The initial RAM filesystem is a ramfs which is loaded by the
-	  boot loader (loadlin or lilo) and that is mounted as root
-	  before the normal boot procedure. It is typically used to
-	  load modules needed to mount the "real" root file system,
-	  etc. See <file:Documentation/initrd.txt> for details.
-
-	  If RAM disk support (BLK_DEV_RAM) is also included, this
-	  also enables initial RAM disk (initrd) support and adds
-	  15 Kbytes (more on some other architectures) to the kernel size.
-
-	  If unsure say Y.
-
 config CDROM_PKTCDVD
 	tristate "Packet writing on CD/DVD media"
 	depends on !UML

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 05dfe35..0c716ee 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c

@@ -1291,13 +1291,19 @@
 	if (inq_buff == NULL)
 		goto mem_msg;
 
+ 	/* testing to see if 16-byte CDBs are already being used */
+ 	if (h->cciss_read == CCISS_READ_16) {
+ 		cciss_read_capacity_16(h->ctlr, drv_index, 1,
+ 			&total_size, &block_size);
+ 		goto geo_inq;
+ 	}
+
 	cciss_read_capacity(ctlr, drv_index, 1,
 			    &total_size, &block_size);
 
-	/* total size = last LBA + 1 */
-	/* FFFFFFFF + 1 = 0, cannot have a logical volume of size 0 */
-	/* so we assume this volume this must be >2TB in size */
-	if (total_size == (__u32) 0) {
+  	/* if read_capacity returns all F's this volume is >2TB in size */
+  	/* so we switch to 16-byte CDB's for all read/write ops */
+  	if (total_size == 0xFFFFFFFFULL) {
 		cciss_read_capacity_16(ctlr, drv_index, 1,
 		&total_size, &block_size);
 		h->cciss_read = CCISS_READ_16;
@@ -1306,6 +1312,7 @@
 		h->cciss_read = CCISS_READ_10;
 		h->cciss_write = CCISS_WRITE_10;
 	}
+geo_inq:
 	cciss_geometry_inquiry(ctlr, drv_index, 1, total_size, block_size,
 			       inq_buff, &h->drv[drv_index]);
 
@@ -1917,13 +1924,14 @@
 			drv->raid_level = inq_buff->data_byte[8];
 		}
 		drv->block_size = block_size;
-		drv->nr_blocks = total_size;
+		drv->nr_blocks = total_size + 1;
 		t = drv->heads * drv->sectors;
 		if (t > 1) {
-			unsigned rem = sector_div(total_size, t);
+			sector_t real_size = total_size + 1;
+			unsigned long rem = sector_div(real_size, t);
 			if (rem)
-				total_size++;
-			drv->cylinders = total_size;
+				real_size++;
+			drv->cylinders = real_size;
 		}
 	} else {		/* Get geometry failed */
 		printk(KERN_WARNING "cciss: reading geometry failed\n");
@@ -1953,16 +1961,16 @@
 				ctlr, buf, sizeof(ReadCapdata_struct),
 					1, logvol, 0, NULL, TYPE_CMD);
 	if (return_code == IO_OK) {
-		*total_size = be32_to_cpu(*(__u32 *) buf->total_size)+1;
+		*total_size = be32_to_cpu(*(__u32 *) buf->total_size);
 		*block_size = be32_to_cpu(*(__u32 *) buf->block_size);
 	} else {		/* read capacity command failed */
 		printk(KERN_WARNING "cciss: read capacity failed\n");
 		*total_size = 0;
 		*block_size = BLOCK_SIZE;
 	}
-	if (*total_size != (__u32) 0)
+	if (*total_size != 0)
 		printk(KERN_INFO "      blocks= %llu block_size= %d\n",
-		(unsigned long long)*total_size, *block_size);
+		(unsigned long long)*total_size+1, *block_size);
 	kfree(buf);
 	return;
 }
@@ -1989,7 +1997,7 @@
 				1, logvol, 0, NULL, TYPE_CMD);
 	}
 	if (return_code == IO_OK) {
-		*total_size = be64_to_cpu(*(__u64 *) buf->total_size)+1;
+		*total_size = be64_to_cpu(*(__u64 *) buf->total_size);
 		*block_size = be32_to_cpu(*(__u32 *) buf->block_size);
 	} else {		/* read capacity command failed */
 		printk(KERN_WARNING "cciss: read capacity failed\n");
@@ -1997,7 +2005,7 @@
 		*block_size = BLOCK_SIZE;
 	}
 	printk(KERN_INFO "      blocks= %llu block_size= %d\n",
-	       (unsigned long long)*total_size, *block_size);
+	       (unsigned long long)*total_size+1, *block_size);
 	kfree(buf);
 	return;
 }
@@ -3119,8 +3127,9 @@
 		}
 		cciss_read_capacity(cntl_num, i, 0, &total_size, &block_size);
 
-		/* total_size = last LBA + 1 */
-		if(total_size == (__u32) 0) {
+		/* If read_capacity returns all F's the logical is >2TB */
+		/* so we switch to 16-byte CDBs for all read/write ops */
+		if(total_size == 0xFFFFFFFFULL) {
 			cciss_read_capacity_16(cntl_num, i, 0,
 			&total_size, &block_size);
 			hba[cntl_num]->cciss_read = CCISS_READ_16;
@@ -3395,7 +3404,7 @@
 	return -1;
 }
 
-static void __devexit cciss_remove_one(struct pci_dev *pdev)
+static void cciss_remove_one(struct pci_dev *pdev)
 {
 	ctlr_info_t *tmp_ptr;
 	int i, j;
@@ -3419,9 +3428,10 @@
 	memset(flush_buf, 0, 4);
 	return_code = sendcmd(CCISS_CACHE_FLUSH, i, flush_buf, 4, 0, 0, 0, NULL,
 			      TYPE_CMD);
-	if (return_code != IO_OK) {
-		printk(KERN_WARNING "Error Flushing cache on controller %d\n",
-		       i);
+	if (return_code == IO_OK) {
+		printk(KERN_INFO "Completed flushing cache on controller %d\n", i);
+	} else {
+		printk(KERN_WARNING "Error flushing cache on controller %d\n", i);
 	}
 	free_irq(hba[i]->intr[2], hba[i]);
 
@@ -3472,6 +3482,7 @@
 	.probe = cciss_init_one,
 	.remove = __devexit_p(cciss_remove_one),
 	.id_table = cciss_pci_device_id,	/* id_table */
+	.shutdown = cciss_remove_one,
 };
 
 /*

diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c
index 0eb6284..6d3840e 100644
--- a/drivers/crypto/geode-aes.c
+++ b/drivers/crypto/geode-aes.c

@@ -99,9 +99,8 @@
 static unsigned int
 geode_aes_crypt(struct geode_aes_op *op)
 {
-
 	u32 flags = 0;
-	int iflags;
+	unsigned long iflags;
 
 	if (op->len == 0 || op->src == op->dst)
 		return 0;

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index b8f0a11..7f5a598 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig

@@ -677,8 +677,6 @@
 config FB_NVIDIA
 	tristate "nVidia Framebuffer Support"
 	depends on FB && PCI
-	select I2C_ALGOBIT if FB_NVIDIA_I2C
-	select I2C if FB_NVIDIA_I2C
 	select FB_BACKLIGHT if FB_NVIDIA_BACKLIGHT
 	select FB_MODE_HELPERS
 	select FB_CFB_FILLRECT
@@ -697,6 +695,7 @@
 config FB_NVIDIA_I2C
        bool "Enable DDC Support"
        depends on FB_NVIDIA
+       select FB_DDC
        help
 	  This enables I2C support for nVidia Chipsets.  This is used
 	  only for getting EDID information from the attached display
@@ -716,7 +715,6 @@
 config FB_RIVA
 	tristate "nVidia Riva support"
 	depends on FB && PCI
-	select FB_DDC if FB_RIVA_I2C
 	select FB_BACKLIGHT if FB_RIVA_BACKLIGHT
 	select FB_MODE_HELPERS
 	select FB_CFB_FILLRECT
@@ -734,6 +732,7 @@
 config FB_RIVA_I2C
        bool "Enable DDC Support"
        depends on FB_RIVA
+       select FB_DDC
        help
 	  This enables I2C support for nVidia Chipsets.  This is used
 	  only for getting EDID information from the attached display
@@ -812,8 +811,6 @@
 	depends on FB && EXPERIMENTAL && PCI && X86
 	select AGP
 	select AGP_INTEL
-	select I2C_ALGOBIT if FB_INTEL_I2C
-	select I2C if FB_INTEL_I2C
 	select FB_MODE_HELPERS
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
@@ -846,6 +843,7 @@
 config FB_INTEL_I2C
 	bool "DDC/I2C for Intel framebuffer support"
 	depends on FB_INTEL
+	select FB_DDC
 	default y
 	help
 	  Say Y here if you want DDC/I2C support for your on-board Intel graphics.
@@ -924,8 +922,8 @@
 
 config FB_MATROX_I2C
 	tristate "Matrox I2C support"
-	depends on FB_MATROX && I2C
-	select I2C_ALGOBIT
+	depends on FB_MATROX
+	select FB_DDC
 	---help---
 	  This drivers creates I2C buses which are needed for accessing the
 	  DDC (I2C) bus present on all Matroxes, an I2C bus which
@@ -993,7 +991,6 @@
 config FB_RADEON
 	tristate "ATI Radeon display support"
 	depends on FB && PCI
-	select FB_DDC if FB_RADEON_I2C
 	select FB_BACKLIGHT if FB_RADEON_BACKLIGHT
 	select FB_MODE_HELPERS
 	select FB_CFB_FILLRECT
@@ -1018,6 +1015,7 @@
 config FB_RADEON_I2C
 	bool "DDC/I2C for ATI Radeon support"
 	depends on FB_RADEON
+	select FB_DDC
 	default y
 	help
 	  Say Y here if you want DDC/I2C support for your Radeon board. 
@@ -1125,7 +1123,6 @@
 config FB_SAVAGE
 	tristate "S3 Savage support"
 	depends on FB && PCI && EXPERIMENTAL
-	select FB_DDC if FB_SAVAGE_I2C
 	select FB_MODE_HELPERS
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
@@ -1142,6 +1139,7 @@
 config FB_SAVAGE_I2C
        bool "Enable DDC2 Support"
        depends on FB_SAVAGE
+       select FB_DDC
        help
 	  This enables I2C support for S3 Savage Chipsets.  This is used
 	  only for getting EDID information from the attached display

diff --git a/drivers/video/aty/atyfb.h b/drivers/video/aty/atyfb.h
index f72faff..dc62f8e 100644
--- a/drivers/video/aty/atyfb.h
+++ b/drivers/video/aty/atyfb.h

@@ -284,7 +284,8 @@
 #endif
 }
 
-#if defined(CONFIG_PM) || defined(CONFIG_PMAC_BACKLIGHT) || defined (CONFIG_FB_ATY_GENERIC_LCD)
+#if defined(CONFIG_PM) || defined(CONFIG_PMAC_BACKLIGHT) || \
+defined (CONFIG_FB_ATY_GENERIC_LCD) || defined (CONFIG_FB_ATY_BACKLIGHT)
 extern void aty_st_lcd(int index, u32 val, const struct atyfb_par *par);
 extern u32 aty_ld_lcd(int index, const struct atyfb_par *par);
 #endif

diff --git a/drivers/video/nvidia/nv_backlight.c b/drivers/video/nvidia/nv_backlight.c
index a50b303..43f62d8 100644
--- a/drivers/video/nvidia/nv_backlight.c
+++ b/drivers/video/nvidia/nv_backlight.c

@@ -12,6 +12,11 @@
 #include <linux/backlight.h>
 #include <linux/fb.h>
 #include <linux/pci.h>
+
+#ifdef CONFIG_PMAC_BACKLIGHT
+#include <asm/backlight.h>
+#endif
+
 #include "nv_local.h"
 #include "nv_type.h"
 #include "nv_proto.h"

diff --git a/fs/buffer.c b/fs/buffer.c
index e8504b6..1d0852f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c

@@ -2365,6 +2365,10 @@
 }
 EXPORT_SYMBOL(nobh_prepare_write);
 
+/*
+ * Make sure any changes to nobh_commit_write() are reflected in
+ * nobh_truncate_page(), since it doesn't call commit_write().
+ */
 int nobh_commit_write(struct file *file, struct page *page,
 		unsigned from, unsigned to)
 {
@@ -2466,6 +2470,11 @@
 		memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
 		flush_dcache_page(page);
 		kunmap_atomic(kaddr, KM_USER0);
+		/*
+		 * It would be more correct to call aops->commit_write()
+		 * here, but this is more efficient.
+		 */
+		SetPageUptodate(page);
 		set_page_dirty(page);
 	}
 	unlock_page(page);

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 8813990..85a6686 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c

@@ -431,6 +431,8 @@
 	new_parent_dentry = new_parent ?
 		new_parent->dentry : sysfs_mount->mnt_sb->s_root;
 
+	if (old_parent_dentry->d_inode == new_parent_dentry->d_inode)
+		return 0;	/* nothing to move */
 again:
 	mutex_lock(&old_parent_dentry->d_inode->i_mutex);
 	if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) {

diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h
index e997891..84016ff 100644
--- a/include/asm-i386/tsc.h
+++ b/include/asm-i386/tsc.h

@@ -1 +1,67 @@
-#include <asm-x86_64/tsc.h>
+/*
+ * linux/include/asm-i386/tsc.h
+ *
+ * i386 TSC related functions
+ */
+#ifndef _ASM_i386_TSC_H
+#define _ASM_i386_TSC_H
+
+#include <asm/processor.h>
+
+/*
+ * Standard way to access the cycle counter.
+ */
+typedef unsigned long long cycles_t;
+
+extern unsigned int cpu_khz;
+extern unsigned int tsc_khz;
+
+static inline cycles_t get_cycles(void)
+{
+	unsigned long long ret = 0;
+
+#ifndef CONFIG_X86_TSC
+	if (!cpu_has_tsc)
+		return 0;
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+	rdtscll(ret);
+#endif
+	return ret;
+}
+
+/* Like get_cycles, but make sure the CPU is synchronized. */
+static __always_inline cycles_t get_cycles_sync(void)
+{
+	unsigned long long ret;
+#ifdef X86_FEATURE_SYNC_RDTSC
+	unsigned eax;
+
+	/*
+	 * Don't do an additional sync on CPUs where we know
+	 * RDTSC is already synchronous:
+	 */
+	alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
+			  "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
+#else
+	sync_core();
+#endif
+	rdtscll(ret);
+
+	return ret;
+}
+
+extern void tsc_init(void);
+extern void mark_tsc_unstable(void);
+extern int unsynchronized_tsc(void);
+extern void init_tsc_clocksource(void);
+
+/*
+ * Boot-time check whether the TSCs are synchronized across
+ * all CPUs/cores:
+ */
+extern void check_tsc_sync_source(int cpu);
+extern void check_tsc_sync_target(void);
+
+#endif

diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h
index 1f971eb..94d0a12 100644
--- a/include/asm-i386/vmi_time.h
+++ b/include/asm-i386/vmi_time.h

@@ -61,6 +61,14 @@
 #ifdef CONFIG_NO_IDLE_HZ
 extern int vmi_stop_hz_timer(void);
 extern void vmi_account_time_restart_hz_timer(void);
+#else
+static inline int vmi_stop_hz_timer(void)
+{
+	return 0;
+}
+static inline void vmi_account_time_restart_hz_timer(void)
+{
+}
 #endif
 
 /*

diff --git a/include/asm-x86_64/tsc.h b/include/asm-x86_64/tsc.h
index 26c3e98..d66ba6e 100644
--- a/include/asm-x86_64/tsc.h
+++ b/include/asm-x86_64/tsc.h

@@ -1,67 +1 @@
-/*
- * linux/include/asm-x86_64/tsc.h
- *
- * x86_64 TSC related functions
- */
-#ifndef _ASM_x86_64_TSC_H
-#define _ASM_x86_64_TSC_H
-
-#include <asm/processor.h>
-
-/*
- * Standard way to access the cycle counter.
- */
-typedef unsigned long long cycles_t;
-
-extern unsigned int cpu_khz;
-extern unsigned int tsc_khz;
-
-static inline cycles_t get_cycles(void)
-{
-	unsigned long long ret = 0;
-
-#ifndef CONFIG_X86_TSC
-	if (!cpu_has_tsc)
-		return 0;
-#endif
-
-#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
-	rdtscll(ret);
-#endif
-	return ret;
-}
-
-/* Like get_cycles, but make sure the CPU is synchronized. */
-static __always_inline cycles_t get_cycles_sync(void)
-{
-	unsigned long long ret;
-#ifdef X86_FEATURE_SYNC_RDTSC
-	unsigned eax;
-
-	/*
-	 * Don't do an additional sync on CPUs where we know
-	 * RDTSC is already synchronous:
-	 */
-	alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
-			  "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
-#else
-	sync_core();
-#endif
-	rdtscll(ret);
-
-	return ret;
-}
-
-extern void tsc_init(void);
-extern void mark_tsc_unstable(void);
-extern int unsynchronized_tsc(void);
-extern void init_tsc_clocksource(void);
-
-/*
- * Boot-time check whether the TSCs are synchronized across
- * all CPUs/cores:
- */
-extern void check_tsc_sync_source(int cpu);
-extern void check_tsc_sync_target(void);
-
-#endif
+#include <asm-i386/tsc.h>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 229fa01..773e30d 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h

@@ -24,6 +24,7 @@
 #ifndef _LINUX_AUDIT_H_
 #define _LINUX_AUDIT_H_
 
+#include <linux/types.h>
 #include <linux/elf-em.h>
 
 /* The netlink messages for the audit system is divided into blocks:

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 3bef961..5bdbc74 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h

@@ -47,7 +47,7 @@
  *	HRTIMER_CB_IRQSAFE:		Callback may run in hardirq context
  *	HRTIMER_CB_IRQSAFE_NO_RESTART:	Callback may run in hardirq context and
  *					does not restart the timer
- *	HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:	Callback must run in softirq context
+ *	HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:	Callback must run in hardirq context
  *					Special mode for tick emultation
  */
 enum hrtimer_cb_mode {
@@ -139,7 +139,7 @@
 };
 
 /**
- * struct hrtimer_base - the timer base for a specific clock
+ * struct hrtimer_clock_base - the timer base for a specific clock
  * @cpu_base:		per cpu clock base
  * @index:		clock type index for per_cpu support when moving a
  *			timer to a base on another cpu.

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 83b3c7b..35fa4d5 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h

@@ -194,9 +194,7 @@
 
 union svc_addr_u {
     struct in_addr	addr;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
     struct in6_addr	addr6;
-#endif
 };
 
 /*

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index cccea0a..7909687 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h

@@ -66,7 +66,7 @@
  * Function prototypes.
  */
 int		svc_makesock(struct svc_serv *, int, unsigned short, int flags);
-void		svc_close_socket(struct svc_sock *);
+void		svc_force_close_socket(struct svc_sock *);
 int		svc_recv(struct svc_rqst *, long);
 int		svc_send(struct svc_rqst *);
 void		svc_drop(struct svc_rqst *);

diff --git a/init/Kconfig b/init/Kconfig
index f977086..b170aa1 100644
--- a/init/Kconfig
+++ b/init/Kconfig

@@ -304,6 +304,22 @@
 
 	  If unsure, say N.
 
+config BLK_DEV_INITRD
+	bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
+	depends on BROKEN || !FRV
+	help
+	  The initial RAM filesystem is a ramfs which is loaded by the
+	  boot loader (loadlin or lilo) and that is mounted as root
+	  before the normal boot procedure. It is typically used to
+	  load modules needed to mount the "real" root file system,
+	  etc. See <file:Documentation/initrd.txt> for details.
+
+	  If RAM disk support (BLK_DEV_RAM) is also included, this
+	  also enables initial RAM disk (initrd) support and adds
+	  15 Kbytes (more on some other architectures) to the kernel size.
+
+	  If unsure say Y.
+
 if BLK_DEV_INITRD
 
 source "usr/Kconfig"

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 0b5ecbe..554ac36 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c

@@ -731,7 +731,8 @@
 	if (IS_ERR(name))
 		return PTR_ERR(name);
 
-	mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
+	mutex_lock_nested(&mqueue_mnt->mnt_root->d_inode->i_mutex,
+			I_MUTEX_PARENT);
 	dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
 	if (IS_ERR(dentry)) {
 		err = PTR_ERR(dentry);

diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index de93a81..ec4cb9f 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c

@@ -540,19 +540,19 @@
 /*
  * Switch to high resolution mode
  */
-static void hrtimer_switch_to_hres(void)
+static int hrtimer_switch_to_hres(void)
 {
 	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
 	unsigned long flags;
 
 	if (base->hres_active)
-		return;
+		return 1;
 
 	local_irq_save(flags);
 
 	if (tick_init_highres()) {
 		local_irq_restore(flags);
-		return;
+		return 0;
 	}
 	base->hres_active = 1;
 	base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES;
@@ -565,13 +565,14 @@
 	local_irq_restore(flags);
 	printk(KERN_INFO "Switched to high resolution mode on CPU %d\n",
 	       smp_processor_id());
+	return 1;
 }
 
 #else
 
 static inline int hrtimer_hres_active(void) { return 0; }
 static inline int hrtimer_is_hres_enabled(void) { return 0; }
-static inline void hrtimer_switch_to_hres(void) { }
+static inline int hrtimer_switch_to_hres(void) { return 0; }
 static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
 static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 					    struct hrtimer_clock_base *base)
@@ -1130,6 +1131,9 @@
 		if (base->softirq_time.tv64 <= timer->expires.tv64)
 			break;
 
+#ifdef CONFIG_HIGH_RES_TIMERS
+		WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ);
+#endif
 		timer_stats_account_hrtimer(timer);
 
 		fn = timer->function;
@@ -1173,7 +1177,8 @@
 	 * deadlock vs. xtime_lock.
 	 */
 	if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
-		hrtimer_switch_to_hres();
+		if (hrtimer_switch_to_hres())
+			return;
 
 	hrtimer_get_softirq_time(cpu_base);
 

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 95f6657..51a4dd0 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig

@@ -81,30 +81,35 @@
 	bool "Software Suspend"
 	depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP))
 	---help---
-	  Enable the possibility of suspending the machine.
-	  It doesn't need ACPI or APM.
-	  You may suspend your machine by 'swsusp' or 'shutdown -z <time>' 
-	  (patch for sysvinit needed). 
+	  Enable the suspend to disk (STD) functionality.
 
-	  It creates an image which is saved in your active swap. Upon next
+	  You can suspend your machine with 'echo disk > /sys/power/state'.
+	  Alternatively, you can use the additional userland tools available
+	  from <http://suspend.sf.net>.
+
+	  In principle it does not require ACPI or APM, although for example
+	  ACPI will be used if available.
+
+	  It creates an image which is saved in your active swap. Upon the next
 	  boot, pass the 'resume=/dev/swappartition' argument to the kernel to
 	  have it detect the saved image, restore memory state from it, and
 	  continue to run as before. If you do not want the previous state to
-	  be reloaded, then use the 'noresume' kernel argument. However, note
-	  that your partitions will be fsck'd and you must re-mkswap your swap
-	  partitions. It does not work with swap files.
+	  be reloaded, then use the 'noresume' kernel command line argument.
+	  Note, however, that fsck will be run on your filesystems and you will
+	  need to run mkswap against the swap partition used for the suspend.
 
-	  Right now you may boot without resuming and then later resume but
-	  in meantime you cannot use those swap partitions/files which were
-	  involved in suspending. Also in this case there is a risk that buffers
-	  on disk won't match with saved ones.
+	  It also works with swap files to a limited extent (for details see
+	  <file:Documentation/power/swsusp-and-swap-files.txt>).
+
+	  Right now you may boot without resuming and resume later but in the
+	  meantime you cannot use the swap partition(s)/file(s) involved in
+	  suspending.  Also in this case you must not use the filesystems
+	  that were mounted before the suspend.  In particular, you MUST NOT
+	  MOUNT any journaled filesystems mounted before the suspend or they
+	  will get corrupted in a nasty way.
 
 	  For more information take a look at <file:Documentation/power/swsusp.txt>.
 
-	  (For now, swsusp is incompatible with PAE aka HIGHMEM_64G on i386.
-	  we need identity mapping for resume to work, and that is trivial
-	  to get with 4MB pages, but less than trivial on PAE).
-
 config PM_STD_PARTITION
 	string "Default resume partition"
 	depends on SOFTWARE_SUSPEND

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 482b11f..bcd14e8 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c

@@ -60,19 +60,19 @@
 static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
 static char *torture_type = "rcu"; /* What RCU implementation to torture. */
 
-module_param(nreaders, int, 0);
+module_param(nreaders, int, 0444);
 MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
-module_param(nfakewriters, int, 0);
+module_param(nfakewriters, int, 0444);
 MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads");
-module_param(stat_interval, int, 0);
+module_param(stat_interval, int, 0444);
 MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
-module_param(verbose, bool, 0);
+module_param(verbose, bool, 0444);
 MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
-module_param(test_no_idle_hz, bool, 0);
+module_param(test_no_idle_hz, bool, 0444);
 MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
-module_param(shuffle_interval, int, 0);
+module_param(shuffle_interval, int, 0444);
 MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
-module_param(torture_type, charp, 0);
+module_param(torture_type, charp, 0444);
 MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
 
 #define TORTURE_FLAG "-torture:"

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 12b3efe..5567745 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c

@@ -284,6 +284,42 @@
 	spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
+void tick_suspend_broadcast(void)
+{
+	struct clock_event_device *bc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tick_broadcast_lock, flags);
+
+	bc = tick_broadcast_device.evtdev;
+	if (bc && tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
+		clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
+
+	spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+}
+
+int tick_resume_broadcast(void)
+{
+	struct clock_event_device *bc;
+	unsigned long flags;
+	int broadcast = 0;
+
+	spin_lock_irqsave(&tick_broadcast_lock, flags);
+
+	bc = tick_broadcast_device.evtdev;
+	if (bc) {
+		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC &&
+		    !cpus_empty(tick_broadcast_mask))
+			tick_broadcast_start_periodic(bc);
+
+		broadcast = cpu_isset(smp_processor_id(), tick_broadcast_mask);
+	}
+	spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+
+	return broadcast;
+}
+
+
 #ifdef CONFIG_TICK_ONESHOT
 
 static cpumask_t tick_broadcast_oneshot_mask;

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 0986a2b..43ba1bd 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c

@@ -298,6 +298,28 @@
 	spin_unlock_irqrestore(&tick_device_lock, flags);
 }
 
+static void tick_suspend_periodic(void)
+{
+	struct tick_device *td = &__get_cpu_var(tick_cpu_device);
+	unsigned long flags;
+
+	spin_lock_irqsave(&tick_device_lock, flags);
+	if (td->mode == TICKDEV_MODE_PERIODIC)
+		clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
+	spin_unlock_irqrestore(&tick_device_lock, flags);
+}
+
+static void tick_resume_periodic(void)
+{
+	struct tick_device *td = &__get_cpu_var(tick_cpu_device);
+	unsigned long flags;
+
+	spin_lock_irqsave(&tick_device_lock, flags);
+	if (td->mode == TICKDEV_MODE_PERIODIC)
+		tick_setup_periodic(td->evtdev, 0);
+	spin_unlock_irqrestore(&tick_device_lock, flags);
+}
+
 /*
  * Notification about clock event devices
  */
@@ -325,6 +347,16 @@
 		tick_shutdown(dev);
 		break;
 
+	case CLOCK_EVT_NOTIFY_SUSPEND:
+		tick_suspend_periodic();
+		tick_suspend_broadcast();
+		break;
+
+	case CLOCK_EVT_NOTIFY_RESUME:
+		if (!tick_resume_broadcast())
+			tick_resume_periodic();
+		break;
+
 	default:
 		break;
 	}

diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 54861a0..75890ef 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h

@@ -67,6 +67,8 @@
 extern int tick_is_broadcast_device(struct clock_event_device *dev);
 extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
 extern void tick_shutdown_broadcast(unsigned int *cpup);
+extern void tick_suspend_broadcast(void);
+extern int tick_resume_broadcast(void);
 
 extern void
 tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
@@ -90,6 +92,8 @@
 static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
 static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
 static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
+static inline void tick_suspend_broadcast(void) { }
+static inline int tick_resume_broadcast(void) { return 0; }
 
 /*
  * Set the periodic handler in non broadcast mode

diff --git a/kernel/timer.c b/kernel/timer.c
index 8ad3842..797cccb 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c

@@ -862,6 +862,8 @@
 	clock->error = 0;
 	ntp_clear();
 
+	update_vsyscall(&xtime, clock);
+
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
 	/* signal hrtimers about time change */
@@ -997,6 +999,9 @@
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
 	touch_softlockup_watchdog();
+
+	clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
+
 	/* Resume hrtimers */
 	clock_was_set();
 
@@ -1011,6 +1016,9 @@
 	timekeeping_suspended = 1;
 	timekeeping_suspend_time = read_persistent_clock();
 	write_sequnlock_irqrestore(&xtime_lock, flags);
+
+	clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+
 	return 0;
 }
 

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 8353829..b4db53f 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c

@@ -27,22 +27,26 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCDSP
 
+#define svc_serv_is_pooled(serv)    ((serv)->sv_function)
+
 /*
  * Mode for mapping cpus to pools.
  */
 enum {
-	SVC_POOL_NONE = -1,	/* uninitialised, choose one of the others */
+	SVC_POOL_AUTO = -1,	/* choose one of the others */
 	SVC_POOL_GLOBAL,	/* no mapping, just a single global pool
 				 * (legacy & UP mode) */
 	SVC_POOL_PERCPU,	/* one pool per cpu */
 	SVC_POOL_PERNODE	/* one pool per numa node */
 };
+#define SVC_POOL_DEFAULT	SVC_POOL_GLOBAL
 
 /*
  * Structure for mapping cpus to pools and vice versa.
  * Setup once during sunrpc initialisation.
  */
 static struct svc_pool_map {
+	int count;			/* How many svc_servs use us */
 	int mode;			/* Note: int not enum to avoid
 					 * warnings about "enumeration value
 					 * not handled in switch" */
@@ -50,9 +54,63 @@
 	unsigned int *pool_to;		/* maps pool id to cpu or node */
 	unsigned int *to_pool;		/* maps cpu or node to pool id */
 } svc_pool_map = {
-	.mode = SVC_POOL_NONE
+	.count = 0,
+	.mode = SVC_POOL_DEFAULT
 };
+static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
 
+static int
+param_set_pool_mode(const char *val, struct kernel_param *kp)
+{
+	int *ip = (int *)kp->arg;
+	struct svc_pool_map *m = &svc_pool_map;
+	int err;
+
+	mutex_lock(&svc_pool_map_mutex);
+
+	err = -EBUSY;
+	if (m->count)
+		goto out;
+
+	err = 0;
+	if (!strncmp(val, "auto", 4))
+		*ip = SVC_POOL_AUTO;
+	else if (!strncmp(val, "global", 6))
+		*ip = SVC_POOL_GLOBAL;
+	else if (!strncmp(val, "percpu", 6))
+		*ip = SVC_POOL_PERCPU;
+	else if (!strncmp(val, "pernode", 7))
+		*ip = SVC_POOL_PERNODE;
+	else
+		err = -EINVAL;
+
+out:
+	mutex_unlock(&svc_pool_map_mutex);
+	return err;
+}
+
+static int
+param_get_pool_mode(char *buf, struct kernel_param *kp)
+{
+	int *ip = (int *)kp->arg;
+
+	switch (*ip)
+	{
+	case SVC_POOL_AUTO:
+		return strlcpy(buf, "auto", 20);
+	case SVC_POOL_GLOBAL:
+		return strlcpy(buf, "global", 20);
+	case SVC_POOL_PERCPU:
+		return strlcpy(buf, "percpu", 20);
+	case SVC_POOL_PERNODE:
+		return strlcpy(buf, "pernode", 20);
+	default:
+		return sprintf(buf, "%d", *ip);
+	}
+}
+
+module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
+		 &svc_pool_map.mode, 0644);
 
 /*
  * Detect best pool mapping mode heuristically,
@@ -166,18 +224,25 @@
 
 
 /*
- * Build the global map of cpus to pools and vice versa.
+ * Add a reference to the global map of cpus to pools (and
+ * vice versa).  Initialise the map if we're the first user.
+ * Returns the number of pools.
  */
 static unsigned int
-svc_pool_map_init(void)
+svc_pool_map_get(void)
 {
 	struct svc_pool_map *m = &svc_pool_map;
 	int npools = -1;
 
-	if (m->mode != SVC_POOL_NONE)
-		return m->npools;
+	mutex_lock(&svc_pool_map_mutex);
 
-	m->mode = svc_pool_map_choose_mode();
+	if (m->count++) {
+		mutex_unlock(&svc_pool_map_mutex);
+		return m->npools;
+	}
+
+	if (m->mode == SVC_POOL_AUTO)
+		m->mode = svc_pool_map_choose_mode();
 
 	switch (m->mode) {
 	case SVC_POOL_PERCPU:
@@ -195,9 +260,36 @@
 	}
 	m->npools = npools;
 
+	mutex_unlock(&svc_pool_map_mutex);
 	return m->npools;
 }
 
+
+/*
+ * Drop a reference to the global map of cpus to pools.
+ * When the last reference is dropped, the map data is
+ * freed; this allows the sysadmin to change the pool
+ * mode using the pool_mode module option without
+ * rebooting or re-loading sunrpc.ko.
+ */
+static void
+svc_pool_map_put(void)
+{
+	struct svc_pool_map *m = &svc_pool_map;
+
+	mutex_lock(&svc_pool_map_mutex);
+
+	if (!--m->count) {
+		m->mode = SVC_POOL_DEFAULT;
+		kfree(m->to_pool);
+		kfree(m->pool_to);
+		m->npools = 0;
+	}
+
+	mutex_unlock(&svc_pool_map_mutex);
+}
+
+
 /*
  * Set the current thread's cpus_allowed mask so that it
  * will only run on cpus in the given pool.
@@ -212,10 +304,9 @@
 
 	/*
 	 * The caller checks for sv_nrpools > 1, which
-	 * implies that we've been initialized and the
-	 * map mode is not NONE.
+	 * implies that we've been initialized.
 	 */
-	BUG_ON(m->mode == SVC_POOL_NONE);
+	BUG_ON(m->count == 0);
 
 	switch (m->mode)
 	{
@@ -246,18 +337,19 @@
 	unsigned int pidx = 0;
 
 	/*
-	 * SVC_POOL_NONE happens in a pure client when
+	 * An uninitialised map happens in a pure client when
 	 * lockd is brought up, so silently treat it the
 	 * same as SVC_POOL_GLOBAL.
 	 */
-
-	switch (m->mode) {
-	case SVC_POOL_PERCPU:
-		pidx = m->to_pool[cpu];
-		break;
-	case SVC_POOL_PERNODE:
-		pidx = m->to_pool[cpu_to_node(cpu)];
-		break;
+	if (svc_serv_is_pooled(serv)) {
+		switch (m->mode) {
+		case SVC_POOL_PERCPU:
+			pidx = m->to_pool[cpu];
+			break;
+		case SVC_POOL_PERNODE:
+			pidx = m->to_pool[cpu_to_node(cpu)];
+			break;
+		}
 	}
 	return &serv->sv_pools[pidx % serv->sv_nrpools];
 }
@@ -347,7 +439,7 @@
 		  svc_thread_fn func, int sig, struct module *mod)
 {
 	struct svc_serv *serv;
-	unsigned int npools = svc_pool_map_init();
+	unsigned int npools = svc_pool_map_get();
 
 	serv = __svc_create(prog, bufsize, npools, shutdown);
 
@@ -367,6 +459,7 @@
 svc_destroy(struct svc_serv *serv)
 {
 	struct svc_sock	*svsk;
+	struct svc_sock *tmp;
 
 	dprintk("svc: svc_destroy(%s, %d)\n",
 				serv->sv_program->pg_name,
@@ -382,24 +475,23 @@
 
 	del_timer_sync(&serv->sv_temptimer);
 
-	while (!list_empty(&serv->sv_tempsocks)) {
-		svsk = list_entry(serv->sv_tempsocks.next,
-				  struct svc_sock,
-				  sk_list);
-		svc_close_socket(svsk);
-	}
+	list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list)
+		svc_force_close_socket(svsk);
+
 	if (serv->sv_shutdown)
 		serv->sv_shutdown(serv);
 
-	while (!list_empty(&serv->sv_permsocks)) {
-		svsk = list_entry(serv->sv_permsocks.next,
-				  struct svc_sock,
-				  sk_list);
-		svc_close_socket(svsk);
-	}
+	list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list)
+		svc_force_close_socket(svsk);
+
+	BUG_ON(!list_empty(&serv->sv_permsocks));
+	BUG_ON(!list_empty(&serv->sv_tempsocks));
 
 	cache_clean_deferred(serv);
 
+	if (svc_serv_is_pooled(serv))
+		svc_pool_map_put();
+
 	/* Unregister service with the portmapper */
 	svc_register(serv, 0, 0);
 	kfree(serv->sv_pools);

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 63ae947..f6e1eb1 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c

@@ -82,6 +82,7 @@
 static void		svc_udp_data_ready(struct sock *, int);
 static int		svc_udp_recvfrom(struct svc_rqst *);
 static int		svc_udp_sendto(struct svc_rqst *);
+static void		svc_close_socket(struct svc_sock *svsk);
 
 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
@@ -131,13 +132,13 @@
 			NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
 			htons(((struct sockaddr_in *) addr)->sin_port));
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
 	case AF_INET6:
 		snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
 			NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
 			htons(((struct sockaddr_in6 *) addr)->sin6_port));
 		break;
-#endif
+
 	default:
 		snprintf(buf, len, "unknown address type: %d", addr->sa_family);
 		break;
@@ -449,9 +450,7 @@
 
 union svc_pktinfo_u {
 	struct in_pktinfo pkti;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct in6_pktinfo pkti6;
-#endif
 };
 
 static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
@@ -467,7 +466,7 @@
 			cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
 		}
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
 	case AF_INET6: {
 			struct in6_pktinfo *pki = CMSG_DATA(cmh);
 
@@ -479,7 +478,6 @@
 			cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
 		}
 		break;
-#endif
 	}
 	return;
 }
@@ -721,45 +719,21 @@
 	}
 }
 
-static void svc_udp_get_sender_address(struct svc_rqst *rqstp,
-					struct sk_buff *skb)
+static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp,
+					    struct cmsghdr *cmh)
 {
 	switch (rqstp->rq_sock->sk_sk->sk_family) {
 	case AF_INET: {
-		/* this seems to come from net/ipv4/udp.c:udp_recvmsg */
-			struct sockaddr_in *sin = svc_addr_in(rqstp);
-
-			sin->sin_family = AF_INET;
-			sin->sin_port = skb->h.uh->source;
-			sin->sin_addr.s_addr = skb->nh.iph->saddr;
-			rqstp->rq_addrlen = sizeof(struct sockaddr_in);
-			/* Remember which interface received this request */
-			rqstp->rq_daddr.addr.s_addr = skb->nh.iph->daddr;
-		}
+		struct in_pktinfo *pki = CMSG_DATA(cmh);
+		rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		}
 	case AF_INET6: {
-		/* this is derived from net/ipv6/udp.c:udpv6_recvmesg */
-			struct sockaddr_in6 *sin6 = svc_addr_in6(rqstp);
-
-			sin6->sin6_family = AF_INET6;
-			sin6->sin6_port = skb->h.uh->source;
-			sin6->sin6_flowinfo = 0;
-			sin6->sin6_scope_id = 0;
-			if (ipv6_addr_type(&sin6->sin6_addr) &
-							IPV6_ADDR_LINKLOCAL)
-				sin6->sin6_scope_id = IP6CB(skb)->iif;
-			ipv6_addr_copy(&sin6->sin6_addr,
-							&skb->nh.ipv6h->saddr);
-			rqstp->rq_addrlen = sizeof(struct sockaddr_in);
-			/* Remember which interface received this request */
-			ipv6_addr_copy(&rqstp->rq_daddr.addr6,
-							&skb->nh.ipv6h->saddr);
-		}
+		struct in6_pktinfo *pki = CMSG_DATA(cmh);
+		ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
 		break;
-#endif
+		}
 	}
-	return;
 }
 
 /*
@@ -771,7 +745,15 @@
 	struct svc_sock	*svsk = rqstp->rq_sock;
 	struct svc_serv	*serv = svsk->sk_server;
 	struct sk_buff	*skb;
+	char		buffer[CMSG_SPACE(sizeof(union svc_pktinfo_u))];
+	struct cmsghdr *cmh = (struct cmsghdr *)buffer;
 	int		err, len;
+	struct msghdr msg = {
+		.msg_name = svc_addr(rqstp),
+		.msg_control = cmh,
+		.msg_controllen = sizeof(buffer),
+		.msg_flags = MSG_DONTWAIT,
+	};
 
 	if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
 	    /* udp sockets need large rcvbuf as all pending
@@ -797,7 +779,9 @@
 	}
 
 	clear_bit(SK_DATA, &svsk->sk_flags);
-	while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
+	while ((err == kernel_recvmsg(svsk->sk_sock, &msg, NULL,
+				      0, 0, MSG_PEEK | MSG_DONTWAIT)) < 0 ||
+	       (skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
 		if (err == -EAGAIN) {
 			svc_sock_received(svsk);
 			return err;
@@ -805,6 +789,7 @@
 		/* possibly an icmp error */
 		dprintk("svc: recvfrom returned error %d\n", -err);
 	}
+	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
 	if (skb->tstamp.off_sec == 0) {
 		struct timeval tv;
 
@@ -827,7 +812,16 @@
 
 	rqstp->rq_prot = IPPROTO_UDP;
 
-	svc_udp_get_sender_address(rqstp, skb);
+	if (cmh->cmsg_level != IPPROTO_IP ||
+	    cmh->cmsg_type != IP_PKTINFO) {
+		if (net_ratelimit())
+			printk("rpcsvc: received unknown control message:"
+			       "%d/%d\n",
+			       cmh->cmsg_level, cmh->cmsg_type);
+		skb_free_datagram(svsk->sk_sk, skb);
+		return 0;
+	}
+	svc_udp_get_dest_address(rqstp, cmh);
 
 	if (skb_is_nonlinear(skb)) {
 		/* we have to copy */
@@ -884,6 +878,9 @@
 static void
 svc_udp_init(struct svc_sock *svsk)
 {
+	int one = 1;
+	mm_segment_t oldfs;
+
 	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
 	svsk->sk_sk->sk_write_space = svc_write_space;
 	svsk->sk_recvfrom = svc_udp_recvfrom;
@@ -899,6 +896,13 @@
 
 	set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */
 	set_bit(SK_CHNGBUF, &svsk->sk_flags);
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	/* make sure we get destination address info */
+	svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO,
+				       (char __user *)&one, sizeof(one));
+	set_fs(oldfs);
 }
 
 /*
@@ -977,11 +981,9 @@
 	case AF_INET:
 		return ntohs(((struct sockaddr_in *)sin)->sin_port)
 			< PROT_SOCK;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case AF_INET6:
 		return ntohs(((struct sockaddr_in6 *)sin)->sin6_port)
 			< PROT_SOCK;
-#endif
 	default:
 		return 0;
 	}
@@ -1786,7 +1788,7 @@
 	spin_unlock_bh(&serv->sv_lock);
 }
 
-void svc_close_socket(struct svc_sock *svsk)
+static void svc_close_socket(struct svc_sock *svsk)
 {
 	set_bit(SK_CLOSE, &svsk->sk_flags);
 	if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
@@ -1799,6 +1801,19 @@
 	svc_sock_put(svsk);
 }
 
+void svc_force_close_socket(struct svc_sock *svsk)
+{
+	set_bit(SK_CLOSE, &svsk->sk_flags);
+	if (test_bit(SK_BUSY, &svsk->sk_flags)) {
+		/* Waiting to be processed, but no threads left,
+		 * So just remove it from the waiting list
+		 */
+		list_del_init(&svsk->sk_ready);
+		clear_bit(SK_BUSY, &svsk->sk_flags);
+	}
+	svc_close_socket(svsk);
+}
+
 /**
  * svc_makesock - Make a socket for nfsd and lockd
  * @serv: RPC server structure
commit	205c911da322908abe127b96d2ef2a4a2aa5109a	[log] [tgz]
author	Linus Torvalds <torvalds@woody.linux-foundation.org>	Tue Mar 06 17:30:59 2007 -0800
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	Tue Mar 06 17:30:59 2007 -0800
tree	43f6878c6a255965a4061d05da68dbc1df9e0ca4
parent	c7276fde27bca89798f33c0be9543dc108468788 [diff]
parent	f3be97427172856d6865ddfedea84fa3a9f33227 [diff]