Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound-2.6 * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound-2.6: (377 commits) ASoC: au1x: PSC-AC97 bugfixes ALSA: dummy - Increase MAX_PCM_SUBSTREAMS to 128 ALSA: dummy - Add debug proc file ALSA: Add const prefix to proc helper functions ALSA: Re-export snd_pcm_format_name() function ALSA: hda - Use auto model for HP laptops with ALC268 codec ALSA: cs46xx - Fix minimum period size ASoC: Fix WM835x Out4 capture enumeration ALSA: Remove unneeded ifdef from sound/core.h ALSA: Remove struct snd_monitor_file from public sound/core.h ASoC: Remove unuused hw_read_t sound: oxygen: work around MCE when changing volume ALSA: dummy - Fake buffer allocations ALSA: hda/realtek: Added support for CLEVO M540R subsystem, 6 channel + digital ASoC: fix pxa2xx-ac97.c breakage ALSA: dummy - Fix the timer calculation in systimer mode ALSA: dummy - Add more description ALSA: dummy - Better jiffies handling ALSA: dummy - Support high-res timer mode ALSA: Release v1.0.21 ...

commit: a9c86d42599519f3d83b5f46bdab25046fe47b84 [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Fri Sep 11 09:19:35 2009 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Fri Sep 11 09:19:35 2009 -0700
tree: 9b269e3162e5cc0c1a8dfc3349303c902718a9a9
parent: a12e4d304ce701844c639541d90df86e165d03f9 [diff]
parent: 1110afbe728838ac7ce973c37af9e11385dbaef9 [diff]
diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index b56aacc..e4dbbdb 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt

@@ -26,7 +26,7 @@
 	- Notes on accessing payload contents
 	- Defining a key type
 	- Request-key callback service
-	- Key access filesystem
+	- Garbage collection
 
 
 ============
@@ -113,6 +113,9 @@
 
      (*) Dead. The key's type was unregistered, and so the key is now useless.
 
+Keys in the last three states are subject to garbage collection.  See the
+section on "Garbage collection".
+
 
 ====================
 KEY SERVICE OVERVIEW
@@ -754,6 +757,26 @@
      successful.
 
 
+ (*) Install the calling process's session keyring on its parent.
+
+	long keyctl(KEYCTL_SESSION_TO_PARENT);
+
+     This functions attempts to install the calling process's session keyring
+     on to the calling process's parent, replacing the parent's current session
+     keyring.
+
+     The calling process must have the same ownership as its parent, the
+     keyring must have the same ownership as the calling process, the calling
+     process must have LINK permission on the keyring and the active LSM module
+     mustn't deny permission, otherwise error EPERM will be returned.
+
+     Error ENOMEM will be returned if there was insufficient memory to complete
+     the operation, otherwise 0 will be returned to indicate success.
+
+     The keyring will be replaced next time the parent process leaves the
+     kernel and resumes executing userspace.
+
+
 ===============
 KERNEL SERVICES
 ===============
@@ -1231,3 +1254,17 @@
 
 In this case, the program isn't required to actually attach the key to a ring;
 the rings are provided for reference.
+
+
+==================
+GARBAGE COLLECTION
+==================
+
+Dead keys (for which the type has been removed) will be automatically unlinked
+from those keyrings that point to them and deleted as soon as possible by a
+background garbage collector.
+
+Similarly, revoked and expired keys will be garbage collected, but only after a
+certain amount of time has passed.  This time is set as a number of seconds in:
+
+	/proc/sys/kernel/keys/gc_delay

diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
index 8906803..34f6638 100644
--- a/Documentation/kmemleak.txt
+++ b/Documentation/kmemleak.txt

@@ -27,6 +27,13 @@
 
   # echo scan > /sys/kernel/debug/kmemleak
 
+To clear the list of all current possible memory leaks:
+
+  # echo clear > /sys/kernel/debug/kmemleak
+
+New leaks will then come up upon reading /sys/kernel/debug/kmemleak
+again.
+
 Note that the orphan objects are listed in the order they were allocated
 and one object at the beginning of the list may cause other subsequent
 objects to be reported as orphan.
@@ -42,6 +49,9 @@
   scan=<secs>	- set the automatic memory scanning period in seconds
 		  (default 600, 0 to stop the automatic scanning)
   scan		- trigger a memory scan
+  clear		- clear list of current memory leak suspects, done by
+		  marking all current reported unreferenced objects grey
+  dump=<addr>	- dump information about the object found at <addr>
 
 Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on
 the kernel command line.
@@ -86,6 +96,27 @@
 address inside the block address range that need to be found so that the
 block is not considered a leak. One example is __vmalloc().
 
+Testing specific sections with kmemleak
+---------------------------------------
+
+Upon initial bootup your /sys/kernel/debug/kmemleak output page may be
+quite extensive. This can also be the case if you have very buggy code
+when doing development. To work around these situations you can use the
+'clear' command to clear all reported unreferenced objects from the
+/sys/kernel/debug/kmemleak output. By issuing a 'scan' after a 'clear'
+you can find new unreferenced objects; this should help with testing
+specific sections of code.
+
+To test a critical section on demand with a clean kmemleak do:
+
+  # echo clear > /sys/kernel/debug/kmemleak
+  ... test your kernel or modules ...
+  # echo scan > /sys/kernel/debug/kmemleak
+
+Then as usual to get your report with:
+
+  # cat /sys/kernel/debug/kmemleak
+
 Kmemleak API
 ------------
 

diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
index 2d10053..ae66f9b 100644
--- a/Documentation/s390/s390dbf.txt
+++ b/Documentation/s390/s390dbf.txt

@@ -495,6 +495,13 @@
 string plus two varargs one would need to allocate a (3 * sizeof(long)) 
 byte data area in the debug_register() function.
 
+IMPORTANT: Using "%s" in sprintf event functions is dangerous. You can only
+use "%s" in the sprintf event functions, if the memory for the passed string is
+available as long as the debug feature exists. The reason behind this is that
+due to performance considerations only a pointer to the string is stored in
+the debug feature. If you log a string that is freed afterwards, you will get
+an OOPS when inspecting the debug feature, because then the debug feature will
+access the already freed memory.
 
 NOTE: If using the sprintf view do NOT use other event/exception functions
 than the sprintf-event and -exception functions.

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 322a00bb..2dbff53 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt

@@ -19,6 +19,7 @@
 show up in /proc/sys/kernel:
 - acpi_video_flags
 - acct
+- callhome		     [ S390 only ]
 - auto_msgmni
 - core_pattern
 - core_uses_pid
@@ -91,6 +92,21 @@
 
 ==============================================================
 
+callhome:
+
+Controls the kernel's callhome behavior in case of a kernel panic.
+
+The s390 hardware allows an operating system to send a notification
+to a service organization (callhome) in case of an operating system panic.
+
+When the value in this file is 0 (which is the default behavior)
+nothing happens in case of a kernel panic. If this value is set to "1"
+the complete kernel oops message is send to the IBM customer service
+organization in case the mainframe the Linux operating system is running
+on has a service contract with IBM.
+
+==============================================================
+
 core_pattern:
 
 core_pattern is used to specify a core dumpfile pattern name.

diff --git a/MAINTAINERS b/MAINTAINERS
index 8dca9d8..989ff11 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -439,7 +439,7 @@
 AMSO1100 RNIC DRIVER
 M:	Tom Tucker <tom@opengridcomputing.com>
 M:	Steve Wise <swise@opengridcomputing.com>
-L:	general@lists.openfabrics.org
+L:	linux-rdma@vger.kernel.org
 S:	Maintained
 F:	drivers/infiniband/hw/amso1100/
 
@@ -1494,7 +1494,7 @@
 
 CXGB3 IWARP RNIC DRIVER (IW_CXGB3)
 M:	Steve Wise <swise@chelsio.com>
-L:	general@lists.openfabrics.org
+L:	linux-rdma@vger.kernel.org
 W:	http://www.openfabrics.org
 S:	Supported
 F:	drivers/infiniband/hw/cxgb3/
@@ -1868,7 +1868,7 @@
 EHCA (IBM GX bus InfiniBand adapter) DRIVER
 M:	Hoang-Nam Nguyen <hnguyen@de.ibm.com>
 M:	Christoph Raisch <raisch@de.ibm.com>
-L:	general@lists.openfabrics.org
+L:	linux-rdma@vger.kernel.org
 S:	Supported
 F:	drivers/infiniband/hw/ehca/
 
@@ -2552,7 +2552,7 @@
 M:	Roland Dreier <rolandd@cisco.com>
 M:	Sean Hefty <sean.hefty@intel.com>
 M:	Hal Rosenstock <hal.rosenstock@gmail.com>
-L:	general@lists.openfabrics.org (moderated for non-subscribers)
+L:	linux-rdma@vger.kernel.org
 W:	http://www.openib.org/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git
 S:	Supported
@@ -2729,7 +2729,7 @@
 
 IPATH DRIVER
 M:	Ralph Campbell <infinipath@qlogic.com>
-L:	general@lists.openfabrics.org
+L:	linux-rdma@vger.kernel.org
 T:	git git://git.qlogic.com/ipath-linux-2.6
 S:	Supported
 F:	drivers/infiniband/hw/ipath/
@@ -3485,7 +3485,7 @@
 NETEFFECT IWARP RNIC DRIVER (IW_NES)
 M:	Faisal Latif <faisal.latif@intel.com>
 M:	Chien Tung <chien.tin.tung@intel.com>
-L:	general@lists.openfabrics.org
+L:	linux-rdma@vger.kernel.org
 W:	http://www.neteffect.com
 S:	Supported
 F:	drivers/infiniband/hw/nes/

diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 60c83ab..5076a88 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h

@@ -75,6 +75,7 @@
 #define TIF_UAC_SIGBUS		7
 #define TIF_MEMDIE		8
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal */
+#define TIF_NOTIFY_RESUME	10	/* callback before returning to user */
 #define TIF_FREEZE		16	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -82,10 +83,12 @@
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 /* Work to do on interrupt/exception return.  */
-#define _TIF_WORK_MASK		(_TIF_SIGPENDING | _TIF_NEED_RESCHED)
+#define _TIF_WORK_MASK		(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+				 _TIF_NOTIFY_RESUME)
 
 /* Work to do on any return to userspace.  */
 #define _TIF_ALLWORK_MASK	(_TIF_WORK_MASK		\

diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index df65eaa..0932dbb 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c

@@ -20,6 +20,7 @@
 #include <linux/binfmts.h>
 #include <linux/bitops.h>
 #include <linux/syscalls.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/sigcontext.h>
@@ -683,4 +684,11 @@
 {
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal(regs, sw, r0, r19);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
+	}
 }

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 73394e5..d3a39b1 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h

@@ -130,11 +130,13 @@
  *  TIF_SYSCALL_TRACE	- syscall trace active
  *  TIF_SIGPENDING	- signal pending
  *  TIF_NEED_RESCHED	- rescheduling necessary
+ *  TIF_NOTIFY_RESUME	- callback before returning to user
  *  TIF_USEDFPU		- FPU was used by this task this quantum (SMP)
  *  TIF_POLLING_NRFLAG	- true if poll_idle() is polling TIF_NEED_RESCHED
  */
 #define TIF_SIGPENDING		0
 #define TIF_NEED_RESCHED	1
+#define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
 #define TIF_SYSCALL_TRACE	8
 #define TIF_POLLING_NRFLAG	16
 #define TIF_USING_IWMMXT	17
@@ -143,6 +145,7 @@
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 8c3de1a..7813ab7 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S

@@ -51,7 +51,7 @@
 work_pending:
 	tst	r1, #_TIF_NEED_RESCHED
 	bne	work_resched
-	tst	r1, #_TIF_SIGPENDING
+	tst	r1, #_TIF_SIGPENDING|_TIF_NOTIFY_RESUME
 	beq	no_work_pending
 	mov	r0, sp				@ 'regs'
 	mov	r2, why				@ 'syscall'

diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index f6bc5d4..b76fe06 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c

@@ -12,6 +12,7 @@
 #include <linux/personality.h>
 #include <linux/freezer.h>
 #include <linux/uaccess.h>
+#include <linux/tracehook.h>
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
@@ -707,4 +708,11 @@
 {
 	if (thread_flags & _TIF_SIGPENDING)
 		do_signal(&current->blocked, regs, syscall);
+
+	if (thread_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
+	}
 }

diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index fc42de5..fd0c5d7 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h

@@ -84,6 +84,7 @@
 #define TIF_MEMDIE		6
 #define TIF_RESTORE_SIGMASK	7	/* restore signal mask in do_signal */
 #define TIF_CPU_GOING_TO_SLEEP	8	/* CPU is entering sleep 0 mode */
+#define TIF_NOTIFY_RESUME	9	/* callback before returning to user */
 #define TIF_FREEZE		29
 #define TIF_DEBUG		30	/* debugging enabled */
 #define TIF_USERSPACE		31      /* true if FS sets userspace */
@@ -96,6 +97,7 @@
 #define _TIF_MEMDIE		(1 << TIF_MEMDIE)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 /* Note: The masks below must never span more than 16 bits! */
@@ -103,13 +105,15 @@
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK				\
 	((1 << TIF_SIGPENDING)			\
+	 | _TIF_NOTIFY_RESUME			\
 	 | (1 << TIF_NEED_RESCHED)		\
 	 | (1 << TIF_POLLING_NRFLAG)		\
 	 | (1 << TIF_BREAKPOINT)		\
 	 | (1 << TIF_RESTORE_SIGMASK))
 
 /* work to do on any return to userspace */
-#define _TIF_ALLWORK_MASK	(_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE))
+#define _TIF_ALLWORK_MASK	(_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE) | \
+				 _TIF_NOTIFY_RESUME)
 /* work to do on return from debug mode */
 #define _TIF_DBGWORK_MASK	(_TIF_WORK_MASK & ~(1 << TIF_BREAKPOINT))
 

diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S
index 009a801..169268c 100644
--- a/arch/avr32/kernel/entry-avr32b.S
+++ b/arch/avr32/kernel/entry-avr32b.S

@@ -281,7 +281,7 @@
 	ld.w	r1, r0[TI_flags]
 	rjmp	1b
 
-2:	mov	r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
+2:	mov	r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NOTIFY_RESUME
 	tst	r1, r2
 	breq	3f
 	unmask_interrupts

diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index 2722756..64f886f 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c

@@ -16,6 +16,7 @@
 #include <linux/ptrace.h>
 #include <linux/unistd.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/ucontext.h>
@@ -322,4 +323,11 @@
 
 	if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal(regs, &current->blocked, syscall);
+
+	if (ti->flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
+	}
 }

diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c
index b326023..48b0f39 100644
--- a/arch/cris/kernel/ptrace.c
+++ b/arch/cris/kernel/ptrace.c

@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -36,4 +37,11 @@
 	/* deal with pending signal delivery */
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(canrestart,regs);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
+	}
 }

diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 4a7a62c..6b0a2b6 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c

@@ -572,6 +572,8 @@
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(__frame);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 
 } /* end do_notify_resume() */

diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h
index 8bbc8b0..70e67e47 100644
--- a/arch/h8300/include/asm/thread_info.h
+++ b/arch/h8300/include/asm/thread_info.h

@@ -89,6 +89,7 @@
 					   TIF_NEED_RESCHED */
 #define TIF_MEMDIE		4
 #define TIF_RESTORE_SIGMASK	5	/* restore signal mask in do_signal() */
+#define TIF_NOTIFY_RESUME	6	/* callback before returning to user */
 #define TIF_FREEZE		16	/* is freezing for suspend */
 
 /* as above, but as bit values */
@@ -97,6 +98,7 @@
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */

diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index cf3472f..af842c3 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c

@@ -39,6 +39,7 @@
 #include <linux/tty.h>
 #include <linux/binfmts.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 
 #include <asm/setup.h>
 #include <asm/uaccess.h>
@@ -552,4 +553,11 @@
 {
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal(regs, NULL);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
+	}
 }

diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 5d7c0e5..89969e9 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c

@@ -192,6 +192,8 @@
 	if (test_thread_flag(TIF_NOTIFY_RESUME)) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(&scr->pt);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 
 	/* copy user rbs to kernel rbs */

diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index 07bb5bd..7157815 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h

@@ -149,6 +149,7 @@
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
 #define TIF_SINGLESTEP		3	/* restore singlestep on return to user mode */
 #define TIF_IRET		4	/* return with iret */
+#define TIF_NOTIFY_RESUME	5	/* callback before returning to user */
 #define TIF_RESTORE_SIGMASK	8	/* restore signal mask in do_signal() */
 #define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling TIF_NEED_RESCHED */
@@ -160,6 +161,7 @@
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
 #define _TIF_IRET		(1<<TIF_IRET)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)

diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 1812454..144b0f12 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c

@@ -21,6 +21,7 @@
 #include <linux/stddef.h>
 #include <linux/personality.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
@@ -408,5 +409,12 @@
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs,oldset);
 
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
+	}
+
 	clear_thread_flag(TIF_IRET);
 }

diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index f9df720..01cc163 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h

@@ -115,6 +115,7 @@
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
 #define TIF_SYSCALL_AUDIT	3	/* syscall auditing active */
 #define TIF_SECCOMP		4	/* secure computing */
+#define TIF_NOTIFY_RESUME	5	/* callback before returning to user */
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
 #define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling TIF_NEED_RESCHED */
@@ -139,6 +140,7 @@
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)

diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 830c5ef..6254041 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c

@@ -21,6 +21,7 @@
 #include <linux/compiler.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
+#include <linux/tracehook.h>
 
 #include <asm/abi.h>
 #include <asm/asm.h>
@@ -700,4 +701,11 @@
 	/* deal with pending signal delivery */
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal(regs);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
+	}
 }

diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index feb2f2e..a21f43b 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c

@@ -568,5 +568,7 @@
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(__frame);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }

diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index 4ce0edf..ac775a7 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h

@@ -59,6 +59,7 @@
 #define TIF_MEMDIE		5
 #define TIF_RESTORE_SIGMASK	6	/* restore saved signal mask */
 #define TIF_FREEZE		7	/* is freezing for suspend */
+#define TIF_NOTIFY_RESUME	8	/* callback before returning to user */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
@@ -67,8 +68,9 @@
 #define _TIF_32BIT		(1 << TIF_32BIT)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_FREEZE		(1 << TIF_FREEZE)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 
-#define _TIF_USER_WORK_MASK     (_TIF_SIGPENDING | \
+#define _TIF_USER_WORK_MASK     (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
                                  _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
 
 #endif /* __KERNEL__ */

diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index e552e54..8c4712b 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S

@@ -948,7 +948,7 @@
 	/* As above */
 	mfctl   %cr30,%r1
 	LDREG	TI_FLAGS(%r1),%r19
-	ldi	(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK), %r20
+	ldi	(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME), %r20
 	and,COND(<>)	%r19, %r20, %r0
 	b,n	intr_restore	/* skip past if we've nothing to do */
 

diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index f825442..8eb3c63 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c

@@ -25,6 +25,7 @@
 #include <linux/stddef.h>
 #include <linux/compat.h>
 #include <linux/elf.h>
+#include <linux/tracehook.h>
 #include <asm/ucontext.h>
 #include <asm/rt_sigframe.h>
 #include <asm/uaccess.h>
@@ -645,4 +646,11 @@
 	if (test_thread_flag(TIF_SIGPENDING) ||
 	    test_thread_flag(TIF_RESTORE_SIGMASK))
 		do_signal(regs, in_syscall);
+
+	if (test_thread_flag(TIF_NOTIFY_RESUME)) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
+	}
 }

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2ae5d72..e030e86 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig

@@ -95,7 +95,6 @@
 	select HAVE_ARCH_TRACEHOOK
 	select INIT_ALL_POSSIBLE
 	select HAVE_PERF_COUNTERS
-	select GENERIC_ATOMIC64 if !64BIT
 
 config SCHED_OMIT_FRAME_POINTER
 	bool
@@ -481,13 +480,6 @@
 	  Select this option to enable the special message interface to
 	  the cooperative memory management.
 
-config PAGE_STATES
-	bool "Unused page notification"
-	help
-	  This enables the notification of unused pages to the
-	  hypervisor. The ESSA instruction is used to do the states
-	  changes between a page that has content and the unused state.
-
 config APPLDATA_BASE
 	bool "Linux - VM Monitor Stream, base infrastructure"
 	depends on PROC_FS

diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 0ff387c..fc8fb20 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile

@@ -88,8 +88,7 @@
 head-y		:= arch/s390/kernel/head.o arch/s390/kernel/init_task.o
 
 core-y		+= arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \
-		   arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ \
-		   arch/s390/power/
+		   arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/
 
 libs-y		+= arch/s390/lib/
 drivers-y	+= drivers/s390/

diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 5a805df..bd9914b 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c

@@ -355,11 +355,7 @@
 {
 	struct dentry *dentry;
 	struct inode *inode;
-	struct qstr qname;
 
-	qname.name = name;
-	qname.len = strlen(name);
-	qname.hash = full_name_hash(name, qname.len);
 	mutex_lock(&parent->d_inode->i_mutex);
 	dentry = lookup_one_len(name, parent, strlen(name));
 	if (IS_ERR(dentry)) {
@@ -426,7 +422,7 @@
 	char tmp[TMP_SIZE];
 	struct dentry *dentry;
 
-	snprintf(tmp, TMP_SIZE, "%lld\n", (unsigned long long int)value);
+	snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value);
 	buffer = kstrdup(tmp, GFP_KERNEL);
 	if (!buffer)
 		return ERR_PTR(-ENOMEM);

diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index c7d0abf..ae7c8f9 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h

@@ -1,33 +1,23 @@
 #ifndef __ARCH_S390_ATOMIC__
 #define __ARCH_S390_ATOMIC__
 
+/*
+ * Copyright 1999,2009 IBM Corp.
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *	      Denis Joseph Barrow,
+ *	      Arnd Bergmann <arndb@de.ibm.com>,
+ *
+ * Atomic operations that C can't guarantee us.
+ * Useful for resource counting etc.
+ * s390 uses 'Compare And Swap' for atomicity in SMP enviroment.
+ *
+ */
+
 #include <linux/compiler.h>
 #include <linux/types.h>
 
-/*
- *  include/asm-s390/atomic.h
- *
- *  S390 version
- *    Copyright (C) 1999-2005 IBM Deutschland Entwicklung GmbH, IBM Corporation
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- *               Denis Joseph Barrow,
- *		 Arnd Bergmann (arndb@de.ibm.com)
- *
- *  Derived from "include/asm-i386/bitops.h"
- *    Copyright (C) 1992, Linus Torvalds
- *
- */
-
-/*
- * Atomic operations that C can't guarantee us.  Useful for
- * resource counting etc..
- * S390 uses 'Compare And Swap' for atomicity in SMP enviroment
- */
-
 #define ATOMIC_INIT(i)  { (i) }
 
-#ifdef __KERNEL__
-
 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
 
 #define __CS_LOOP(ptr, op_val, op_string) ({				\
@@ -77,7 +67,7 @@
 	barrier();
 }
 
-static __inline__ int atomic_add_return(int i, atomic_t * v)
+static inline int atomic_add_return(int i, atomic_t *v)
 {
 	return __CS_LOOP(v, i, "ar");
 }
@@ -87,7 +77,7 @@
 #define atomic_inc_return(_v)		atomic_add_return(1, _v)
 #define atomic_inc_and_test(_v)		(atomic_add_return(1, _v) == 0)
 
-static __inline__ int atomic_sub_return(int i, atomic_t * v)
+static inline int atomic_sub_return(int i, atomic_t *v)
 {
 	return __CS_LOOP(v, i, "sr");
 }
@@ -97,19 +87,19 @@
 #define atomic_dec_return(_v)		atomic_sub_return(1, _v)
 #define atomic_dec_and_test(_v)		(atomic_sub_return(1, _v) == 0)
 
-static __inline__ void atomic_clear_mask(unsigned long mask, atomic_t * v)
+static inline void atomic_clear_mask(unsigned long mask, atomic_t *v)
 {
-	       __CS_LOOP(v, ~mask, "nr");
+	__CS_LOOP(v, ~mask, "nr");
 }
 
-static __inline__ void atomic_set_mask(unsigned long mask, atomic_t * v)
+static inline void atomic_set_mask(unsigned long mask, atomic_t *v)
 {
-	       __CS_LOOP(v, mask, "or");
+	__CS_LOOP(v, mask, "or");
 }
 
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
-static __inline__ int atomic_cmpxchg(atomic_t *v, int old, int new)
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
 	asm volatile(
@@ -127,7 +117,7 @@
 	return old;
 }
 
-static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
@@ -146,9 +136,10 @@
 
 #undef __CS_LOOP
 
-#ifdef __s390x__
 #define ATOMIC64_INIT(i)  { (i) }
 
+#ifdef CONFIG_64BIT
+
 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
 
 #define __CSG_LOOP(ptr, op_val, op_string) ({				\
@@ -162,7 +153,7 @@
 		: "=&d" (old_val), "=&d" (new_val),			\
 		  "=Q" (((atomic_t *)(ptr))->counter)			\
 		: "d" (op_val),	"Q" (((atomic_t *)(ptr))->counter)	\
-		: "cc", "memory" );					\
+		: "cc", "memory");					\
 	new_val;							\
 })
 
@@ -180,7 +171,7 @@
 		  "=m" (((atomic_t *)(ptr))->counter)			\
 		: "a" (ptr), "d" (op_val),				\
 		  "m" (((atomic_t *)(ptr))->counter)			\
-		: "cc", "memory" );					\
+		: "cc", "memory");					\
 	new_val;							\
 })
 
@@ -198,39 +189,29 @@
 	barrier();
 }
 
-static __inline__ long long atomic64_add_return(long long i, atomic64_t * v)
+static inline long long atomic64_add_return(long long i, atomic64_t *v)
 {
 	return __CSG_LOOP(v, i, "agr");
 }
-#define atomic64_add(_i, _v)		atomic64_add_return(_i, _v)
-#define atomic64_add_negative(_i, _v)	(atomic64_add_return(_i, _v) < 0)
-#define atomic64_inc(_v)		atomic64_add_return(1, _v)
-#define atomic64_inc_return(_v)		atomic64_add_return(1, _v)
-#define atomic64_inc_and_test(_v)	(atomic64_add_return(1, _v) == 0)
 
-static __inline__ long long atomic64_sub_return(long long i, atomic64_t * v)
+static inline long long atomic64_sub_return(long long i, atomic64_t *v)
 {
 	return __CSG_LOOP(v, i, "sgr");
 }
-#define atomic64_sub(_i, _v)		atomic64_sub_return(_i, _v)
-#define atomic64_sub_and_test(_i, _v)	(atomic64_sub_return(_i, _v) == 0)
-#define atomic64_dec(_v)		atomic64_sub_return(1, _v)
-#define atomic64_dec_return(_v)		atomic64_sub_return(1, _v)
-#define atomic64_dec_and_test(_v)	(atomic64_sub_return(1, _v) == 0)
 
-static __inline__ void atomic64_clear_mask(unsigned long mask, atomic64_t * v)
+static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v)
 {
-	       __CSG_LOOP(v, ~mask, "ngr");
+	__CSG_LOOP(v, ~mask, "ngr");
 }
 
-static __inline__ void atomic64_set_mask(unsigned long mask, atomic64_t * v)
+static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v)
 {
-	       __CSG_LOOP(v, mask, "ogr");
+	__CSG_LOOP(v, mask, "ogr");
 }
 
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static __inline__ long long atomic64_cmpxchg(atomic64_t *v,
+static inline long long atomic64_cmpxchg(atomic64_t *v,
 					     long long old, long long new)
 {
 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
@@ -249,8 +230,112 @@
 	return old;
 }
 
-static __inline__ int atomic64_add_unless(atomic64_t *v,
-					  long long a, long long u)
+#undef __CSG_LOOP
+
+#else /* CONFIG_64BIT */
+
+typedef struct {
+	long long counter;
+} atomic64_t;
+
+static inline long long atomic64_read(const atomic64_t *v)
+{
+	register_pair rp;
+
+	asm volatile(
+		"	lm	%0,%N0,0(%1)"
+		: "=&d" (rp)
+		: "a" (&v->counter), "m" (v->counter)
+		);
+	return rp.pair;
+}
+
+static inline void atomic64_set(atomic64_t *v, long long i)
+{
+	register_pair rp = {.pair = i};
+
+	asm volatile(
+		"	stm	%1,%N1,0(%2)"
+		: "=m" (v->counter)
+		: "d" (rp), "a" (&v->counter)
+		);
+}
+
+static inline long long atomic64_xchg(atomic64_t *v, long long new)
+{
+	register_pair rp_new = {.pair = new};
+	register_pair rp_old;
+
+	asm volatile(
+		"	lm	%0,%N0,0(%2)\n"
+		"0:	cds	%0,%3,0(%2)\n"
+		"	jl	0b\n"
+		: "=&d" (rp_old), "+m" (v->counter)
+		: "a" (&v->counter), "d" (rp_new)
+		: "cc");
+	return rp_old.pair;
+}
+
+static inline long long atomic64_cmpxchg(atomic64_t *v,
+					 long long old, long long new)
+{
+	register_pair rp_old = {.pair = old};
+	register_pair rp_new = {.pair = new};
+
+	asm volatile(
+		"	cds	%0,%3,0(%2)"
+		: "+&d" (rp_old), "+m" (v->counter)
+		: "a" (&v->counter), "d" (rp_new)
+		: "cc");
+	return rp_old.pair;
+}
+
+
+static inline long long atomic64_add_return(long long i, atomic64_t *v)
+{
+	long long old, new;
+
+	do {
+		old = atomic64_read(v);
+		new = old + i;
+	} while (atomic64_cmpxchg(v, old, new) != old);
+	return new;
+}
+
+static inline long long atomic64_sub_return(long long i, atomic64_t *v)
+{
+	long long old, new;
+
+	do {
+		old = atomic64_read(v);
+		new = old - i;
+	} while (atomic64_cmpxchg(v, old, new) != old);
+	return new;
+}
+
+static inline void atomic64_set_mask(unsigned long long mask, atomic64_t *v)
+{
+	long long old, new;
+
+	do {
+		old = atomic64_read(v);
+		new = old | mask;
+	} while (atomic64_cmpxchg(v, old, new) != old);
+}
+
+static inline void atomic64_clear_mask(unsigned long long mask, atomic64_t *v)
+{
+	long long old, new;
+
+	do {
+		old = atomic64_read(v);
+		new = old & mask;
+	} while (atomic64_cmpxchg(v, old, new) != old);
+}
+
+#endif /* CONFIG_64BIT */
+
+static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 {
 	long long c, old;
 	c = atomic64_read(v);
@@ -265,15 +350,17 @@
 	return c != u;
 }
 
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
-#undef __CSG_LOOP
-
-#else /* __s390x__ */
-
-#include <asm-generic/atomic64.h>
-
-#endif /* __s390x__ */
+#define atomic64_add(_i, _v)		atomic64_add_return(_i, _v)
+#define atomic64_add_negative(_i, _v)	(atomic64_add_return(_i, _v) < 0)
+#define atomic64_inc(_v)		atomic64_add_return(1, _v)
+#define atomic64_inc_return(_v)		atomic64_add_return(1, _v)
+#define atomic64_inc_and_test(_v)	(atomic64_add_return(1, _v) == 0)
+#define atomic64_sub(_i, _v)		atomic64_sub_return(_i, _v)
+#define atomic64_sub_and_test(_i, _v)	(atomic64_sub_return(_i, _v) == 0)
+#define atomic64_dec(_v)		atomic64_sub_return(1, _v)
+#define atomic64_dec_return(_v)		atomic64_sub_return(1, _v)
+#define atomic64_dec_and_test(_v)	(atomic64_sub_return(1, _v) == 0)
+#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
 
 #define smp_mb__before_atomic_dec()	smp_mb()
 #define smp_mb__after_atomic_dec()	smp_mb()
@@ -281,5 +368,5 @@
 #define smp_mb__after_atomic_inc()	smp_mb()
 
 #include <asm-generic/atomic-long.h>
-#endif /* __KERNEL__ */
+
 #endif /* __ARCH_S390_ATOMIC__  */

diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index d5a8e7c..6c00f68 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h

@@ -78,28 +78,11 @@
  */
 static inline __sum16 csum_fold(__wsum sum)
 {
-#ifndef __s390x__
-	register_pair rp;
+	u32 csum = (__force u32) sum;
 
-	asm volatile(
-		"	slr	%N1,%N1\n"	/* %0 = H L */
-		"	lr	%1,%0\n"	/* %0 = H L, %1 = H L 0 0 */
-		"	srdl	%1,16\n"	/* %0 = H L, %1 = 0 H L 0 */
-		"	alr	%1,%N1\n"	/* %0 = H L, %1 = L H L 0 */
-		"	alr	%0,%1\n"	/* %0 = H+L+C L+H */
-		"	srl	%0,16\n"	/* %0 = H+L+C */
-		: "+&d" (sum), "=d" (rp) : : "cc");
-#else /* __s390x__ */
-	asm volatile(
-		"	sr	3,3\n"		/* %0 = H*65536 + L */
-		"	lr	2,%0\n"		/* %0 = H L, 2/3 = H L / 0 0 */
-		"	srdl	2,16\n"		/* %0 = H L, 2/3 = 0 H / L 0 */
-		"	alr	2,3\n"		/* %0 = H L, 2/3 = L H / L 0 */
-		"	alr	%0,2\n"		/* %0 = H+L+C L+H */
-		"	srl	%0,16\n"	/* %0 = H+L+C */
-		: "+&d" (sum) : : "cc", "2", "3");
-#endif /* __s390x__ */
-	return (__force __sum16) ~sum;
+	csum += (csum >> 16) + (csum << 16);
+	csum >>= 16;
+	return (__force __sum16) ~csum;
 }
 
 /*

diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h
index 807997f..4943654 100644
--- a/arch/s390/include/asm/chsc.h
+++ b/arch/s390/include/asm/chsc.h

@@ -125,4 +125,32 @@
 #define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info)
 #define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal)
 
+#ifdef __KERNEL__
+
+struct css_general_char {
+	u64 : 12;
+	u32 dynio : 1;	 /* bit 12 */
+	u32 : 28;
+	u32 aif : 1;	 /* bit 41 */
+	u32 : 3;
+	u32 mcss : 1;	 /* bit 45 */
+	u32 fcs : 1;	 /* bit 46 */
+	u32 : 1;
+	u32 ext_mb : 1;  /* bit 48 */
+	u32 : 7;
+	u32 aif_tdd : 1; /* bit 56 */
+	u32 : 1;
+	u32 qebsm : 1;	 /* bit 58 */
+	u32 : 8;
+	u32 aif_osa : 1; /* bit 67 */
+	u32 : 14;
+	u32 cib : 1;	 /* bit 82 */
+	u32 : 5;
+	u32 fcx : 1;	 /* bit 88 */
+	u32 : 7;
+}__attribute__((packed));
+
+extern struct css_general_char css_general_characteristics;
+
+#endif /* __KERNEL__ */
 #endif

diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 619bf94..e85679a 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h

@@ -15,228 +15,7 @@
 #define LPM_ANYPATH 0xff
 #define __MAX_CSSID 0
 
-/**
- * struct cmd_scsw - command-mode subchannel status word
- * @key: subchannel key
- * @sctl: suspend control
- * @eswf: esw format
- * @cc: deferred condition code
- * @fmt: format
- * @pfch: prefetch
- * @isic: initial-status interruption control
- * @alcc: address-limit checking control
- * @ssi: suppress-suspended interruption
- * @zcc: zero condition code
- * @ectl: extended control
- * @pno: path not operational
- * @res: reserved
- * @fctl: function control
- * @actl: activity control
- * @stctl: status control
- * @cpa: channel program address
- * @dstat: device status
- * @cstat: subchannel status
- * @count: residual count
- */
-struct cmd_scsw {
-	__u32 key  : 4;
-	__u32 sctl : 1;
-	__u32 eswf : 1;
-	__u32 cc   : 2;
-	__u32 fmt  : 1;
-	__u32 pfch : 1;
-	__u32 isic : 1;
-	__u32 alcc : 1;
-	__u32 ssi  : 1;
-	__u32 zcc  : 1;
-	__u32 ectl : 1;
-	__u32 pno  : 1;
-	__u32 res  : 1;
-	__u32 fctl : 3;
-	__u32 actl : 7;
-	__u32 stctl : 5;
-	__u32 cpa;
-	__u32 dstat : 8;
-	__u32 cstat : 8;
-	__u32 count : 16;
-} __attribute__ ((packed));
-
-/**
- * struct tm_scsw - transport-mode subchannel status word
- * @key: subchannel key
- * @eswf: esw format
- * @cc: deferred condition code
- * @fmt: format
- * @x: IRB-format control
- * @q: interrogate-complete
- * @ectl: extended control
- * @pno: path not operational
- * @fctl: function control
- * @actl: activity control
- * @stctl: status control
- * @tcw: TCW address
- * @dstat: device status
- * @cstat: subchannel status
- * @fcxs: FCX status
- * @schxs: subchannel-extended status
- */
-struct tm_scsw {
-	u32 key:4;
-	u32 :1;
-	u32 eswf:1;
-	u32 cc:2;
-	u32 fmt:3;
-	u32 x:1;
-	u32 q:1;
-	u32 :1;
-	u32 ectl:1;
-	u32 pno:1;
-	u32 :1;
-	u32 fctl:3;
-	u32 actl:7;
-	u32 stctl:5;
-	u32 tcw;
-	u32 dstat:8;
-	u32 cstat:8;
-	u32 fcxs:8;
-	u32 schxs:8;
-} __attribute__ ((packed));
-
-/**
- * union scsw - subchannel status word
- * @cmd: command-mode SCSW
- * @tm: transport-mode SCSW
- */
-union scsw {
-	struct cmd_scsw cmd;
-	struct tm_scsw tm;
-} __attribute__ ((packed));
-
-int scsw_is_tm(union scsw *scsw);
-u32 scsw_key(union scsw *scsw);
-u32 scsw_eswf(union scsw *scsw);
-u32 scsw_cc(union scsw *scsw);
-u32 scsw_ectl(union scsw *scsw);
-u32 scsw_pno(union scsw *scsw);
-u32 scsw_fctl(union scsw *scsw);
-u32 scsw_actl(union scsw *scsw);
-u32 scsw_stctl(union scsw *scsw);
-u32 scsw_dstat(union scsw *scsw);
-u32 scsw_cstat(union scsw *scsw);
-int scsw_is_solicited(union scsw *scsw);
-int scsw_is_valid_key(union scsw *scsw);
-int scsw_is_valid_eswf(union scsw *scsw);
-int scsw_is_valid_cc(union scsw *scsw);
-int scsw_is_valid_ectl(union scsw *scsw);
-int scsw_is_valid_pno(union scsw *scsw);
-int scsw_is_valid_fctl(union scsw *scsw);
-int scsw_is_valid_actl(union scsw *scsw);
-int scsw_is_valid_stctl(union scsw *scsw);
-int scsw_is_valid_dstat(union scsw *scsw);
-int scsw_is_valid_cstat(union scsw *scsw);
-int scsw_cmd_is_valid_key(union scsw *scsw);
-int scsw_cmd_is_valid_sctl(union scsw *scsw);
-int scsw_cmd_is_valid_eswf(union scsw *scsw);
-int scsw_cmd_is_valid_cc(union scsw *scsw);
-int scsw_cmd_is_valid_fmt(union scsw *scsw);
-int scsw_cmd_is_valid_pfch(union scsw *scsw);
-int scsw_cmd_is_valid_isic(union scsw *scsw);
-int scsw_cmd_is_valid_alcc(union scsw *scsw);
-int scsw_cmd_is_valid_ssi(union scsw *scsw);
-int scsw_cmd_is_valid_zcc(union scsw *scsw);
-int scsw_cmd_is_valid_ectl(union scsw *scsw);
-int scsw_cmd_is_valid_pno(union scsw *scsw);
-int scsw_cmd_is_valid_fctl(union scsw *scsw);
-int scsw_cmd_is_valid_actl(union scsw *scsw);
-int scsw_cmd_is_valid_stctl(union scsw *scsw);
-int scsw_cmd_is_valid_dstat(union scsw *scsw);
-int scsw_cmd_is_valid_cstat(union scsw *scsw);
-int scsw_cmd_is_solicited(union scsw *scsw);
-int scsw_tm_is_valid_key(union scsw *scsw);
-int scsw_tm_is_valid_eswf(union scsw *scsw);
-int scsw_tm_is_valid_cc(union scsw *scsw);
-int scsw_tm_is_valid_fmt(union scsw *scsw);
-int scsw_tm_is_valid_x(union scsw *scsw);
-int scsw_tm_is_valid_q(union scsw *scsw);
-int scsw_tm_is_valid_ectl(union scsw *scsw);
-int scsw_tm_is_valid_pno(union scsw *scsw);
-int scsw_tm_is_valid_fctl(union scsw *scsw);
-int scsw_tm_is_valid_actl(union scsw *scsw);
-int scsw_tm_is_valid_stctl(union scsw *scsw);
-int scsw_tm_is_valid_dstat(union scsw *scsw);
-int scsw_tm_is_valid_cstat(union scsw *scsw);
-int scsw_tm_is_valid_fcxs(union scsw *scsw);
-int scsw_tm_is_valid_schxs(union scsw *scsw);
-int scsw_tm_is_solicited(union scsw *scsw);
-
-#define SCSW_FCTL_CLEAR_FUNC	 0x1
-#define SCSW_FCTL_HALT_FUNC	 0x2
-#define SCSW_FCTL_START_FUNC	 0x4
-
-#define SCSW_ACTL_SUSPENDED	 0x1
-#define SCSW_ACTL_DEVACT	 0x2
-#define SCSW_ACTL_SCHACT	 0x4
-#define SCSW_ACTL_CLEAR_PEND	 0x8
-#define SCSW_ACTL_HALT_PEND	 0x10
-#define SCSW_ACTL_START_PEND	 0x20
-#define SCSW_ACTL_RESUME_PEND	 0x40
-
-#define SCSW_STCTL_STATUS_PEND	 0x1
-#define SCSW_STCTL_SEC_STATUS	 0x2
-#define SCSW_STCTL_PRIM_STATUS	 0x4
-#define SCSW_STCTL_INTER_STATUS	 0x8
-#define SCSW_STCTL_ALERT_STATUS	 0x10
-
-#define DEV_STAT_ATTENTION	 0x80
-#define DEV_STAT_STAT_MOD	 0x40
-#define DEV_STAT_CU_END		 0x20
-#define DEV_STAT_BUSY		 0x10
-#define DEV_STAT_CHN_END	 0x08
-#define DEV_STAT_DEV_END	 0x04
-#define DEV_STAT_UNIT_CHECK	 0x02
-#define DEV_STAT_UNIT_EXCEP	 0x01
-
-#define SCHN_STAT_PCI		 0x80
-#define SCHN_STAT_INCORR_LEN	 0x40
-#define SCHN_STAT_PROG_CHECK	 0x20
-#define SCHN_STAT_PROT_CHECK	 0x10
-#define SCHN_STAT_CHN_DATA_CHK	 0x08
-#define SCHN_STAT_CHN_CTRL_CHK	 0x04
-#define SCHN_STAT_INTF_CTRL_CHK	 0x02
-#define SCHN_STAT_CHAIN_CHECK	 0x01
-
-/*
- * architectured values for first sense byte
- */
-#define SNS0_CMD_REJECT		0x80
-#define SNS_CMD_REJECT		SNS0_CMD_REJEC
-#define SNS0_INTERVENTION_REQ	0x40
-#define SNS0_BUS_OUT_CHECK	0x20
-#define SNS0_EQUIPMENT_CHECK	0x10
-#define SNS0_DATA_CHECK		0x08
-#define SNS0_OVERRUN		0x04
-#define SNS0_INCOMPL_DOMAIN	0x01
-
-/*
- * architectured values for second sense byte
- */
-#define SNS1_PERM_ERR		0x80
-#define SNS1_INV_TRACK_FORMAT	0x40
-#define SNS1_EOC		0x20
-#define SNS1_MESSAGE_TO_OPER	0x10
-#define SNS1_NO_REC_FOUND	0x08
-#define SNS1_FILE_PROTECTED	0x04
-#define SNS1_WRITE_INHIBITED	0x02
-#define SNS1_INPRECISE_END	0x01
-
-/*
- * architectured values for third sense byte
- */
-#define SNS2_REQ_INH_WRITE	0x80
-#define SNS2_CORRECTABLE	0x40
-#define SNS2_FIRST_LOG_ERR	0x20
-#define SNS2_ENV_DATA_PRESENT	0x10
-#define SNS2_INPRECISE_END	0x04
+#include <asm/scsw.h>
 
 /**
  * struct ccw1 - channel command word

diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
new file mode 100644
index 0000000..471234b
--- /dev/null
+++ b/arch/s390/include/asm/cpu.h

@@ -0,0 +1,26 @@
+/*
+ *    Copyright IBM Corp. 2000,2009
+ *    Author(s): Hartmut Penner <hp@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Christian Ehrhardt <ehrhardt@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_CPU_H
+#define _ASM_S390_CPU_H
+
+#define MAX_CPU_ADDRESS 255
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+struct cpuid
+{
+	unsigned int version :	8;
+	unsigned int ident   : 24;
+	unsigned int machine : 16;
+	unsigned int unused  : 16;
+} __packed;
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_CPU_H */

diff --git a/arch/s390/include/asm/cpuid.h b/arch/s390/include/asm/cpuid.h
deleted file mode 100644
index 07836a2..0000000
--- a/arch/s390/include/asm/cpuid.h
+++ /dev/null

@@ -1,25 +0,0 @@
-/*
- *    Copyright IBM Corp. 2000,2009
- *    Author(s): Hartmut Penner <hp@de.ibm.com>,
- *		 Martin Schwidefsky <schwidefsky@de.ibm.com>
- *		 Christian Ehrhardt <ehrhardt@de.ibm.com>
- */
-
-#ifndef _ASM_S390_CPUID_H_
-#define _ASM_S390_CPUID_H_
-
-/*
- *  CPU type and hardware bug flags. Kept separately for each CPU.
- *  Members of this structure are referenced in head.S, so think twice
- *  before touching them. [mj]
- */
-
-typedef struct
-{
-	unsigned int version :	8;
-	unsigned int ident   : 24;
-	unsigned int machine : 16;
-	unsigned int unused  : 16;
-} __attribute__ ((packed)) cpuid_t;
-
-#endif /* _ASM_S390_CPUID_H_ */

diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 31ed568..18124b7 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h

@@ -167,6 +167,10 @@
         return debug_event_common(id,level,txt,strlen(txt));
 }
 
+/*
+ * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
+ * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ */
 extern debug_entry_t *
 debug_sprintf_event(debug_info_t* id,int level,char *string,...)
 	__attribute__ ((format(printf, 3, 4)));
@@ -206,7 +210,10 @@
         return debug_exception_common(id,level,txt,strlen(txt));
 }
 
-
+/*
+ * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
+ * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ */
 extern debug_entry_t *
 debug_sprintf_exception(debug_info_t* id,int level,char *string,...)
 	__attribute__ ((format(printf, 3, 4)));

diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index 89ec705..498bc38 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h

@@ -18,13 +18,6 @@
 #include <linux/interrupt.h>
 #include <asm/lowcore.h>
 
-/* irq_cpustat_t is unused currently, but could be converted
- * into a percpu variable instead of storing softirq_pending
- * on the lowcore */
-typedef struct {
-	unsigned int __softirq_pending;
-} irq_cpustat_t;
-
 #define local_softirq_pending() (S390_lowcore.softirq_pending)
 
 #define __ARCH_IRQ_STAT

diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 1171e6d..5e95d95 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h

@@ -57,6 +57,8 @@
 } __attribute__((packed));
 
 #define DIAG308_VMPARM_SIZE	64
+#define DIAG308_SCPDATA_SIZE	(PAGE_SIZE - (sizeof(struct ipl_list_hdr) + \
+				 offsetof(struct ipl_block_fcp, scp_data)))
 
 struct ipl_block_ccw {
 	u8  load_parm[8];
@@ -91,7 +93,8 @@
 extern void do_poff(void);
 extern void ipl_save_parameters(void);
 extern void ipl_update_parameters(void);
-extern void get_ipl_vmparm(char *);
+extern size_t append_ipl_vmparm(char *, size_t);
+extern size_t append_ipl_scpdata(char *, size_t);
 
 enum {
 	IPL_DEVNO_VALID		= 1,

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 1cd02f6..698988f 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h

@@ -17,7 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/kvm_host.h>
 #include <asm/debug.h>
-#include <asm/cpuid.h>
+#include <asm/cpu.h>
 
 #define KVM_MAX_VCPUS 64
 #define KVM_MEMORY_SLOTS 32
@@ -217,8 +217,8 @@
 	struct hrtimer    ckc_timer;
 	struct tasklet_struct tasklet;
 	union  {
-		cpuid_t	  cpu_id;
-		u64	  stidp_data;
+		struct cpuid	cpu_id;
+		u64		stidp_data;
 	};
 };
 

diff --git a/arch/s390/include/asm/kvm_virtio.h b/arch/s390/include/asm/kvm_virtio.h
index 0503936..acdfdff 100644
--- a/arch/s390/include/asm/kvm_virtio.h
+++ b/arch/s390/include/asm/kvm_virtio.h

@@ -54,14 +54,4 @@
  * This is pagesize for historical reasons. */
 #define KVM_S390_VIRTIO_RING_ALIGN	4096
 
-#ifdef __KERNEL__
-/* early virtio console setup */
-#ifdef CONFIG_S390_GUEST
-extern void s390_virtio_console_init(void);
-#else
-static inline void s390_virtio_console_init(void)
-{
-}
-#endif /* CONFIG_VIRTIO_CONSOLE */
-#endif /* __KERNEL__ */
 #endif

diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 5046ad6..6bc9426 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h

@@ -132,7 +132,7 @@
 
 #ifndef __ASSEMBLY__
 
-#include <asm/cpuid.h>
+#include <asm/cpu.h>
 #include <asm/ptrace.h>
 #include <linux/types.h>
 
@@ -275,7 +275,7 @@
 	__u32	user_exec_asce;			/* 0x02ac */
 
 	/* SMP info area */
-	cpuid_t	cpu_id;				/* 0x02b0 */
+	struct cpuid cpu_id;			/* 0x02b0 */
 	__u32	cpu_nr;				/* 0x02b8 */
 	__u32	softirq_pending;		/* 0x02bc */
 	__u32	percpu_offset;			/* 0x02c0 */
@@ -380,7 +380,7 @@
 	__u64	user_exec_asce;			/* 0x0318 */
 
 	/* SMP info area */
-	cpuid_t	cpu_id;				/* 0x0320 */
+	struct cpuid cpu_id;			/* 0x0320 */
 	__u32	cpu_nr;				/* 0x0328 */
 	__u32	softirq_pending;		/* 0x032c */
 	__u64	percpu_offset;			/* 0x0330 */

diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 3b59216..03be999 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h

@@ -2,6 +2,7 @@
 #define __MMU_H
 
 typedef struct {
+	spinlock_t list_lock;
 	struct list_head crst_list;
 	struct list_head pgtable_list;
 	unsigned long asce_bits;

diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 3e3594d..5e9daf5 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h

@@ -125,8 +125,6 @@
 	return skey;
 }
 
-#ifdef CONFIG_PAGE_STATES
-
 struct page;
 void arch_free_page(struct page *page, int order);
 void arch_alloc_page(struct page *page, int order);
@@ -134,8 +132,6 @@
 #define HAVE_ARCH_FREE_PAGE
 #define HAVE_ARCH_ALLOC_PAGE
 
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #define __PAGE_OFFSET           0x0UL

diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index b2658b9..ddad590 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h

@@ -140,6 +140,7 @@
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
+	spin_lock_init(&mm->context.list_lock);
 	INIT_LIST_HEAD(&mm->context.crst_list);
 	INIT_LIST_HEAD(&mm->context.pgtable_list);
 	return (pgd_t *) crst_table_alloc(mm, s390_noexec);

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index c139fa7..cf8eed3 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h

@@ -14,7 +14,7 @@
 #define __ASM_S390_PROCESSOR_H
 
 #include <linux/linkage.h>
-#include <asm/cpuid.h>
+#include <asm/cpu.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
 #include <asm/setup.h>
@@ -26,7 +26,7 @@
  */
 #define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; })
 
-static inline void get_cpu_id(cpuid_t *ptr)
+static inline void get_cpu_id(struct cpuid *ptr)
 {
 	asm volatile("stidp 0(%1)" : "=m" (*ptr) : "a" (ptr));
 }

diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
index 29ec8e2..35d786f 100644
--- a/arch/s390/include/asm/scatterlist.h
+++ b/arch/s390/include/asm/scatterlist.h

@@ -1,19 +1 @@
-#ifndef _ASMS390_SCATTERLIST_H
-#define _ASMS390_SCATTERLIST_H
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-    unsigned long sg_magic;
-#endif
-    unsigned long page_link;
-    unsigned int offset;
-    unsigned int length;
-};
-
-#ifdef __s390x__
-#define ISA_DMA_THRESHOLD (0xffffffffffffffffUL)
-#else
-#define ISA_DMA_THRESHOLD (0xffffffffUL)
-#endif
-
-#endif /* _ASMS390X_SCATTERLIST_H */
+#include <asm-generic/scatterlist.h>

diff --git a/drivers/s390/cio/scsw.c b/arch/s390/include/asm/scsw.h
similarity index 69%
rename from drivers/s390/cio/scsw.c
rename to arch/s390/include/asm/scsw.h
index f8da25a..de389cb 100644
--- a/drivers/s390/cio/scsw.c
+++ b/arch/s390/include/asm/scsw.h

@@ -1,15 +1,182 @@
 /*
  *  Helper functions for scsw access.
  *
- *    Copyright IBM Corp. 2008
+ *    Copyright IBM Corp. 2008,2009
  *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
  */
 
+#ifndef _ASM_S390_SCSW_H_
+#define _ASM_S390_SCSW_H_
+
 #include <linux/types.h>
-#include <linux/module.h>
+#include <asm/chsc.h>
 #include <asm/cio.h>
-#include "css.h"
-#include "chsc.h"
+
+/**
+ * struct cmd_scsw - command-mode subchannel status word
+ * @key: subchannel key
+ * @sctl: suspend control
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @fmt: format
+ * @pfch: prefetch
+ * @isic: initial-status interruption control
+ * @alcc: address-limit checking control
+ * @ssi: suppress-suspended interruption
+ * @zcc: zero condition code
+ * @ectl: extended control
+ * @pno: path not operational
+ * @res: reserved
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @cpa: channel program address
+ * @dstat: device status
+ * @cstat: subchannel status
+ * @count: residual count
+ */
+struct cmd_scsw {
+	__u32 key  : 4;
+	__u32 sctl : 1;
+	__u32 eswf : 1;
+	__u32 cc   : 2;
+	__u32 fmt  : 1;
+	__u32 pfch : 1;
+	__u32 isic : 1;
+	__u32 alcc : 1;
+	__u32 ssi  : 1;
+	__u32 zcc  : 1;
+	__u32 ectl : 1;
+	__u32 pno  : 1;
+	__u32 res  : 1;
+	__u32 fctl : 3;
+	__u32 actl : 7;
+	__u32 stctl : 5;
+	__u32 cpa;
+	__u32 dstat : 8;
+	__u32 cstat : 8;
+	__u32 count : 16;
+} __attribute__ ((packed));
+
+/**
+ * struct tm_scsw - transport-mode subchannel status word
+ * @key: subchannel key
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @fmt: format
+ * @x: IRB-format control
+ * @q: interrogate-complete
+ * @ectl: extended control
+ * @pno: path not operational
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @tcw: TCW address
+ * @dstat: device status
+ * @cstat: subchannel status
+ * @fcxs: FCX status
+ * @schxs: subchannel-extended status
+ */
+struct tm_scsw {
+	u32 key:4;
+	u32 :1;
+	u32 eswf:1;
+	u32 cc:2;
+	u32 fmt:3;
+	u32 x:1;
+	u32 q:1;
+	u32 :1;
+	u32 ectl:1;
+	u32 pno:1;
+	u32 :1;
+	u32 fctl:3;
+	u32 actl:7;
+	u32 stctl:5;
+	u32 tcw;
+	u32 dstat:8;
+	u32 cstat:8;
+	u32 fcxs:8;
+	u32 schxs:8;
+} __attribute__ ((packed));
+
+/**
+ * union scsw - subchannel status word
+ * @cmd: command-mode SCSW
+ * @tm: transport-mode SCSW
+ */
+union scsw {
+	struct cmd_scsw cmd;
+	struct tm_scsw tm;
+} __attribute__ ((packed));
+
+#define SCSW_FCTL_CLEAR_FUNC	 0x1
+#define SCSW_FCTL_HALT_FUNC	 0x2
+#define SCSW_FCTL_START_FUNC	 0x4
+
+#define SCSW_ACTL_SUSPENDED	 0x1
+#define SCSW_ACTL_DEVACT	 0x2
+#define SCSW_ACTL_SCHACT	 0x4
+#define SCSW_ACTL_CLEAR_PEND	 0x8
+#define SCSW_ACTL_HALT_PEND	 0x10
+#define SCSW_ACTL_START_PEND	 0x20
+#define SCSW_ACTL_RESUME_PEND	 0x40
+
+#define SCSW_STCTL_STATUS_PEND	 0x1
+#define SCSW_STCTL_SEC_STATUS	 0x2
+#define SCSW_STCTL_PRIM_STATUS	 0x4
+#define SCSW_STCTL_INTER_STATUS	 0x8
+#define SCSW_STCTL_ALERT_STATUS	 0x10
+
+#define DEV_STAT_ATTENTION	 0x80
+#define DEV_STAT_STAT_MOD	 0x40
+#define DEV_STAT_CU_END		 0x20
+#define DEV_STAT_BUSY		 0x10
+#define DEV_STAT_CHN_END	 0x08
+#define DEV_STAT_DEV_END	 0x04
+#define DEV_STAT_UNIT_CHECK	 0x02
+#define DEV_STAT_UNIT_EXCEP	 0x01
+
+#define SCHN_STAT_PCI		 0x80
+#define SCHN_STAT_INCORR_LEN	 0x40
+#define SCHN_STAT_PROG_CHECK	 0x20
+#define SCHN_STAT_PROT_CHECK	 0x10
+#define SCHN_STAT_CHN_DATA_CHK	 0x08
+#define SCHN_STAT_CHN_CTRL_CHK	 0x04
+#define SCHN_STAT_INTF_CTRL_CHK	 0x02
+#define SCHN_STAT_CHAIN_CHECK	 0x01
+
+/*
+ * architectured values for first sense byte
+ */
+#define SNS0_CMD_REJECT		0x80
+#define SNS_CMD_REJECT		SNS0_CMD_REJEC
+#define SNS0_INTERVENTION_REQ	0x40
+#define SNS0_BUS_OUT_CHECK	0x20
+#define SNS0_EQUIPMENT_CHECK	0x10
+#define SNS0_DATA_CHECK		0x08
+#define SNS0_OVERRUN		0x04
+#define SNS0_INCOMPL_DOMAIN	0x01
+
+/*
+ * architectured values for second sense byte
+ */
+#define SNS1_PERM_ERR		0x80
+#define SNS1_INV_TRACK_FORMAT	0x40
+#define SNS1_EOC		0x20
+#define SNS1_MESSAGE_TO_OPER	0x10
+#define SNS1_NO_REC_FOUND	0x08
+#define SNS1_FILE_PROTECTED	0x04
+#define SNS1_WRITE_INHIBITED	0x02
+#define SNS1_INPRECISE_END	0x01
+
+/*
+ * architectured values for third sense byte
+ */
+#define SNS2_REQ_INH_WRITE	0x80
+#define SNS2_CORRECTABLE	0x40
+#define SNS2_FIRST_LOG_ERR	0x20
+#define SNS2_ENV_DATA_PRESENT	0x10
+#define SNS2_INPRECISE_END	0x04
 
 /**
  * scsw_is_tm - check for transport mode scsw
@@ -18,11 +185,10 @@
  * Return non-zero if the specified scsw is a transport mode scsw, zero
  * otherwise.
  */
-int scsw_is_tm(union scsw *scsw)
+static inline int scsw_is_tm(union scsw *scsw)
 {
 	return css_general_characteristics.fcx && (scsw->tm.x == 1);
 }
-EXPORT_SYMBOL(scsw_is_tm);
 
 /**
  * scsw_key - return scsw key field
@@ -31,14 +197,13 @@
  * Return the value of the key field of the specified scsw, regardless of
  * whether it is a transport mode or command mode scsw.
  */
-u32 scsw_key(union scsw *scsw)
+static inline u32 scsw_key(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw->tm.key;
 	else
 		return scsw->cmd.key;
 }
-EXPORT_SYMBOL(scsw_key);
 
 /**
  * scsw_eswf - return scsw eswf field
@@ -47,14 +212,13 @@
  * Return the value of the eswf field of the specified scsw, regardless of
  * whether it is a transport mode or command mode scsw.
  */
-u32 scsw_eswf(union scsw *scsw)
+static inline u32 scsw_eswf(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw->tm.eswf;
 	else
 		return scsw->cmd.eswf;
 }
-EXPORT_SYMBOL(scsw_eswf);
 
 /**
  * scsw_cc - return scsw cc field
@@ -63,14 +227,13 @@
  * Return the value of the cc field of the specified scsw, regardless of
  * whether it is a transport mode or command mode scsw.
  */
-u32 scsw_cc(union scsw *scsw)
+static inline u32 scsw_cc(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw->tm.cc;
 	else
 		return scsw->cmd.cc;
 }
-EXPORT_SYMBOL(scsw_cc);
 
 /**
  * scsw_ectl - return scsw ectl field
@@ -79,14 +242,13 @@
  * Return the value of the ectl field of the specified scsw, regardless of
  * whether it is a transport mode or command mode scsw.
  */
-u32 scsw_ectl(union scsw *scsw)
+static inline u32 scsw_ectl(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw->tm.ectl;
 	else
 		return scsw->cmd.ectl;
 }
-EXPORT_SYMBOL(scsw_ectl);
 
 /**
  * scsw_pno - return scsw pno field
@@ -95,14 +257,13 @@
  * Return the value of the pno field of the specified scsw, regardless of
  * whether it is a transport mode or command mode scsw.
  */
-u32 scsw_pno(union scsw *scsw)
+static inline u32 scsw_pno(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw->tm.pno;
 	else
 		return scsw->cmd.pno;
 }
-EXPORT_SYMBOL(scsw_pno);
 
 /**
  * scsw_fctl - return scsw fctl field
@@ -111,14 +272,13 @@
  * Return the value of the fctl field of the specified scsw, regardless of
  * whether it is a transport mode or command mode scsw.
  */
-u32 scsw_fctl(union scsw *scsw)
+static inline u32 scsw_fctl(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw->tm.fctl;
 	else
 		return scsw->cmd.fctl;
 }
-EXPORT_SYMBOL(scsw_fctl);
 
 /**
  * scsw_actl - return scsw actl field
@@ -127,14 +287,13 @@
  * Return the value of the actl field of the specified scsw, regardless of
  * whether it is a transport mode or command mode scsw.
  */
-u32 scsw_actl(union scsw *scsw)
+static inline u32 scsw_actl(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw->tm.actl;
 	else
 		return scsw->cmd.actl;
 }
-EXPORT_SYMBOL(scsw_actl);
 
 /**
  * scsw_stctl - return scsw stctl field
@@ -143,14 +302,13 @@
  * Return the value of the stctl field of the specified scsw, regardless of
  * whether it is a transport mode or command mode scsw.
  */
-u32 scsw_stctl(union scsw *scsw)
+static inline u32 scsw_stctl(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw->tm.stctl;
 	else
 		return scsw->cmd.stctl;
 }
-EXPORT_SYMBOL(scsw_stctl);
 
 /**
  * scsw_dstat - return scsw dstat field
@@ -159,14 +317,13 @@
  * Return the value of the dstat field of the specified scsw, regardless of
  * whether it is a transport mode or command mode scsw.
  */
-u32 scsw_dstat(union scsw *scsw)
+static inline u32 scsw_dstat(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw->tm.dstat;
 	else
 		return scsw->cmd.dstat;
 }
-EXPORT_SYMBOL(scsw_dstat);
 
 /**
  * scsw_cstat - return scsw cstat field
@@ -175,14 +332,13 @@
  * Return the value of the cstat field of the specified scsw, regardless of
  * whether it is a transport mode or command mode scsw.
  */
-u32 scsw_cstat(union scsw *scsw)
+static inline u32 scsw_cstat(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw->tm.cstat;
 	else
 		return scsw->cmd.cstat;
 }
-EXPORT_SYMBOL(scsw_cstat);
 
 /**
  * scsw_cmd_is_valid_key - check key field validity
@@ -191,11 +347,10 @@
  * Return non-zero if the key field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_key(union scsw *scsw)
+static inline int scsw_cmd_is_valid_key(union scsw *scsw)
 {
 	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_key);
 
 /**
  * scsw_cmd_is_valid_sctl - check fctl field validity
@@ -204,11 +359,10 @@
  * Return non-zero if the fctl field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_sctl(union scsw *scsw)
+static inline int scsw_cmd_is_valid_sctl(union scsw *scsw)
 {
 	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_sctl);
 
 /**
  * scsw_cmd_is_valid_eswf - check eswf field validity
@@ -217,11 +371,10 @@
  * Return non-zero if the eswf field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_eswf(union scsw *scsw)
+static inline int scsw_cmd_is_valid_eswf(union scsw *scsw)
 {
 	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_eswf);
 
 /**
  * scsw_cmd_is_valid_cc - check cc field validity
@@ -230,12 +383,11 @@
  * Return non-zero if the cc field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_cc(union scsw *scsw)
+static inline int scsw_cmd_is_valid_cc(union scsw *scsw)
 {
 	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
 	       (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_cc);
 
 /**
  * scsw_cmd_is_valid_fmt - check fmt field validity
@@ -244,11 +396,10 @@
  * Return non-zero if the fmt field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_fmt(union scsw *scsw)
+static inline int scsw_cmd_is_valid_fmt(union scsw *scsw)
 {
 	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_fmt);
 
 /**
  * scsw_cmd_is_valid_pfch - check pfch field validity
@@ -257,11 +408,10 @@
  * Return non-zero if the pfch field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_pfch(union scsw *scsw)
+static inline int scsw_cmd_is_valid_pfch(union scsw *scsw)
 {
 	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_pfch);
 
 /**
  * scsw_cmd_is_valid_isic - check isic field validity
@@ -270,11 +420,10 @@
  * Return non-zero if the isic field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_isic(union scsw *scsw)
+static inline int scsw_cmd_is_valid_isic(union scsw *scsw)
 {
 	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_isic);
 
 /**
  * scsw_cmd_is_valid_alcc - check alcc field validity
@@ -283,11 +432,10 @@
  * Return non-zero if the alcc field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_alcc(union scsw *scsw)
+static inline int scsw_cmd_is_valid_alcc(union scsw *scsw)
 {
 	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_alcc);
 
 /**
  * scsw_cmd_is_valid_ssi - check ssi field validity
@@ -296,11 +444,10 @@
  * Return non-zero if the ssi field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_ssi(union scsw *scsw)
+static inline int scsw_cmd_is_valid_ssi(union scsw *scsw)
 {
 	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_ssi);
 
 /**
  * scsw_cmd_is_valid_zcc - check zcc field validity
@@ -309,12 +456,11 @@
  * Return non-zero if the zcc field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_zcc(union scsw *scsw)
+static inline int scsw_cmd_is_valid_zcc(union scsw *scsw)
 {
 	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
 	       (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS);
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_zcc);
 
 /**
  * scsw_cmd_is_valid_ectl - check ectl field validity
@@ -323,13 +469,12 @@
  * Return non-zero if the ectl field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_ectl(union scsw *scsw)
+static inline int scsw_cmd_is_valid_ectl(union scsw *scsw)
 {
 	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
 	       !(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
 	       (scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS);
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_ectl);
 
 /**
  * scsw_cmd_is_valid_pno - check pno field validity
@@ -338,7 +483,7 @@
  * Return non-zero if the pno field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_pno(union scsw *scsw)
+static inline int scsw_cmd_is_valid_pno(union scsw *scsw)
 {
 	return (scsw->cmd.fctl != 0) &&
 	       (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
@@ -346,7 +491,6 @@
 		 ((scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
 		  (scsw->cmd.actl & SCSW_ACTL_SUSPENDED)));
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_pno);
 
 /**
  * scsw_cmd_is_valid_fctl - check fctl field validity
@@ -355,12 +499,11 @@
  * Return non-zero if the fctl field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_fctl(union scsw *scsw)
+static inline int scsw_cmd_is_valid_fctl(union scsw *scsw)
 {
 	/* Only valid if pmcw.dnv == 1*/
 	return 1;
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_fctl);
 
 /**
  * scsw_cmd_is_valid_actl - check actl field validity
@@ -369,12 +512,11 @@
  * Return non-zero if the actl field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_actl(union scsw *scsw)
+static inline int scsw_cmd_is_valid_actl(union scsw *scsw)
 {
 	/* Only valid if pmcw.dnv == 1*/
 	return 1;
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_actl);
 
 /**
  * scsw_cmd_is_valid_stctl - check stctl field validity
@@ -383,12 +525,11 @@
  * Return non-zero if the stctl field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_stctl(union scsw *scsw)
+static inline int scsw_cmd_is_valid_stctl(union scsw *scsw)
 {
 	/* Only valid if pmcw.dnv == 1*/
 	return 1;
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_stctl);
 
 /**
  * scsw_cmd_is_valid_dstat - check dstat field validity
@@ -397,12 +538,11 @@
  * Return non-zero if the dstat field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_dstat(union scsw *scsw)
+static inline int scsw_cmd_is_valid_dstat(union scsw *scsw)
 {
 	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
 	       (scsw->cmd.cc != 3);
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_dstat);
 
 /**
  * scsw_cmd_is_valid_cstat - check cstat field validity
@@ -411,12 +551,11 @@
  * Return non-zero if the cstat field of the specified command mode scsw is
  * valid, zero otherwise.
  */
-int scsw_cmd_is_valid_cstat(union scsw *scsw)
+static inline int scsw_cmd_is_valid_cstat(union scsw *scsw)
 {
 	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
 	       (scsw->cmd.cc != 3);
 }
-EXPORT_SYMBOL(scsw_cmd_is_valid_cstat);
 
 /**
  * scsw_tm_is_valid_key - check key field validity
@@ -425,11 +564,10 @@
  * Return non-zero if the key field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_key(union scsw *scsw)
+static inline int scsw_tm_is_valid_key(union scsw *scsw)
 {
 	return (scsw->tm.fctl & SCSW_FCTL_START_FUNC);
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_key);
 
 /**
  * scsw_tm_is_valid_eswf - check eswf field validity
@@ -438,11 +576,10 @@
  * Return non-zero if the eswf field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_eswf(union scsw *scsw)
+static inline int scsw_tm_is_valid_eswf(union scsw *scsw)
 {
 	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_eswf);
 
 /**
  * scsw_tm_is_valid_cc - check cc field validity
@@ -451,12 +588,11 @@
  * Return non-zero if the cc field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_cc(union scsw *scsw)
+static inline int scsw_tm_is_valid_cc(union scsw *scsw)
 {
 	return (scsw->tm.fctl & SCSW_FCTL_START_FUNC) &&
 	       (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_cc);
 
 /**
  * scsw_tm_is_valid_fmt - check fmt field validity
@@ -465,11 +601,10 @@
  * Return non-zero if the fmt field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_fmt(union scsw *scsw)
+static inline int scsw_tm_is_valid_fmt(union scsw *scsw)
 {
 	return 1;
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_fmt);
 
 /**
  * scsw_tm_is_valid_x - check x field validity
@@ -478,11 +613,10 @@
  * Return non-zero if the x field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_x(union scsw *scsw)
+static inline int scsw_tm_is_valid_x(union scsw *scsw)
 {
 	return 1;
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_x);
 
 /**
  * scsw_tm_is_valid_q - check q field validity
@@ -491,11 +625,10 @@
  * Return non-zero if the q field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_q(union scsw *scsw)
+static inline int scsw_tm_is_valid_q(union scsw *scsw)
 {
 	return 1;
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_q);
 
 /**
  * scsw_tm_is_valid_ectl - check ectl field validity
@@ -504,13 +637,12 @@
  * Return non-zero if the ectl field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_ectl(union scsw *scsw)
+static inline int scsw_tm_is_valid_ectl(union scsw *scsw)
 {
 	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
 	       !(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
 	       (scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS);
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_ectl);
 
 /**
  * scsw_tm_is_valid_pno - check pno field validity
@@ -519,7 +651,7 @@
  * Return non-zero if the pno field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_pno(union scsw *scsw)
+static inline int scsw_tm_is_valid_pno(union scsw *scsw)
 {
 	return (scsw->tm.fctl != 0) &&
 	       (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
@@ -527,7 +659,6 @@
 		 ((scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
 		  (scsw->tm.actl & SCSW_ACTL_SUSPENDED)));
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_pno);
 
 /**
  * scsw_tm_is_valid_fctl - check fctl field validity
@@ -536,12 +667,11 @@
  * Return non-zero if the fctl field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_fctl(union scsw *scsw)
+static inline int scsw_tm_is_valid_fctl(union scsw *scsw)
 {
 	/* Only valid if pmcw.dnv == 1*/
 	return 1;
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_fctl);
 
 /**
  * scsw_tm_is_valid_actl - check actl field validity
@@ -550,12 +680,11 @@
  * Return non-zero if the actl field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_actl(union scsw *scsw)
+static inline int scsw_tm_is_valid_actl(union scsw *scsw)
 {
 	/* Only valid if pmcw.dnv == 1*/
 	return 1;
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_actl);
 
 /**
  * scsw_tm_is_valid_stctl - check stctl field validity
@@ -564,12 +693,11 @@
  * Return non-zero if the stctl field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_stctl(union scsw *scsw)
+static inline int scsw_tm_is_valid_stctl(union scsw *scsw)
 {
 	/* Only valid if pmcw.dnv == 1*/
 	return 1;
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_stctl);
 
 /**
  * scsw_tm_is_valid_dstat - check dstat field validity
@@ -578,12 +706,11 @@
  * Return non-zero if the dstat field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_dstat(union scsw *scsw)
+static inline int scsw_tm_is_valid_dstat(union scsw *scsw)
 {
 	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
 	       (scsw->tm.cc != 3);
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_dstat);
 
 /**
  * scsw_tm_is_valid_cstat - check cstat field validity
@@ -592,12 +719,11 @@
  * Return non-zero if the cstat field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_cstat(union scsw *scsw)
+static inline int scsw_tm_is_valid_cstat(union scsw *scsw)
 {
 	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
 	       (scsw->tm.cc != 3);
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_cstat);
 
 /**
  * scsw_tm_is_valid_fcxs - check fcxs field validity
@@ -606,11 +732,10 @@
  * Return non-zero if the fcxs field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_fcxs(union scsw *scsw)
+static inline int scsw_tm_is_valid_fcxs(union scsw *scsw)
 {
 	return 1;
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_fcxs);
 
 /**
  * scsw_tm_is_valid_schxs - check schxs field validity
@@ -619,14 +744,13 @@
  * Return non-zero if the schxs field of the specified transport mode scsw is
  * valid, zero otherwise.
  */
-int scsw_tm_is_valid_schxs(union scsw *scsw)
+static inline int scsw_tm_is_valid_schxs(union scsw *scsw)
 {
 	return (scsw->tm.cstat & (SCHN_STAT_PROG_CHECK |
 				  SCHN_STAT_INTF_CTRL_CHK |
 				  SCHN_STAT_PROT_CHECK |
 				  SCHN_STAT_CHN_DATA_CHK));
 }
-EXPORT_SYMBOL(scsw_tm_is_valid_schxs);
 
 /**
  * scsw_is_valid_actl - check actl field validity
@@ -636,14 +760,13 @@
  * regardless of whether it is a transport mode or command mode scsw.
  * Return zero if the field does not contain a valid value.
  */
-int scsw_is_valid_actl(union scsw *scsw)
+static inline int scsw_is_valid_actl(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw_tm_is_valid_actl(scsw);
 	else
 		return scsw_cmd_is_valid_actl(scsw);
 }
-EXPORT_SYMBOL(scsw_is_valid_actl);
 
 /**
  * scsw_is_valid_cc - check cc field validity
@@ -653,14 +776,13 @@
  * regardless of whether it is a transport mode or command mode scsw.
  * Return zero if the field does not contain a valid value.
  */
-int scsw_is_valid_cc(union scsw *scsw)
+static inline int scsw_is_valid_cc(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw_tm_is_valid_cc(scsw);
 	else
 		return scsw_cmd_is_valid_cc(scsw);
 }
-EXPORT_SYMBOL(scsw_is_valid_cc);
 
 /**
  * scsw_is_valid_cstat - check cstat field validity
@@ -670,14 +792,13 @@
  * regardless of whether it is a transport mode or command mode scsw.
  * Return zero if the field does not contain a valid value.
  */
-int scsw_is_valid_cstat(union scsw *scsw)
+static inline int scsw_is_valid_cstat(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw_tm_is_valid_cstat(scsw);
 	else
 		return scsw_cmd_is_valid_cstat(scsw);
 }
-EXPORT_SYMBOL(scsw_is_valid_cstat);
 
 /**
  * scsw_is_valid_dstat - check dstat field validity
@@ -687,14 +808,13 @@
  * regardless of whether it is a transport mode or command mode scsw.
  * Return zero if the field does not contain a valid value.
  */
-int scsw_is_valid_dstat(union scsw *scsw)
+static inline int scsw_is_valid_dstat(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw_tm_is_valid_dstat(scsw);
 	else
 		return scsw_cmd_is_valid_dstat(scsw);
 }
-EXPORT_SYMBOL(scsw_is_valid_dstat);
 
 /**
  * scsw_is_valid_ectl - check ectl field validity
@@ -704,14 +824,13 @@
  * regardless of whether it is a transport mode or command mode scsw.
  * Return zero if the field does not contain a valid value.
  */
-int scsw_is_valid_ectl(union scsw *scsw)
+static inline int scsw_is_valid_ectl(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw_tm_is_valid_ectl(scsw);
 	else
 		return scsw_cmd_is_valid_ectl(scsw);
 }
-EXPORT_SYMBOL(scsw_is_valid_ectl);
 
 /**
  * scsw_is_valid_eswf - check eswf field validity
@@ -721,14 +840,13 @@
  * regardless of whether it is a transport mode or command mode scsw.
  * Return zero if the field does not contain a valid value.
  */
-int scsw_is_valid_eswf(union scsw *scsw)
+static inline int scsw_is_valid_eswf(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw_tm_is_valid_eswf(scsw);
 	else
 		return scsw_cmd_is_valid_eswf(scsw);
 }
-EXPORT_SYMBOL(scsw_is_valid_eswf);
 
 /**
  * scsw_is_valid_fctl - check fctl field validity
@@ -738,14 +856,13 @@
  * regardless of whether it is a transport mode or command mode scsw.
  * Return zero if the field does not contain a valid value.
  */
-int scsw_is_valid_fctl(union scsw *scsw)
+static inline int scsw_is_valid_fctl(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw_tm_is_valid_fctl(scsw);
 	else
 		return scsw_cmd_is_valid_fctl(scsw);
 }
-EXPORT_SYMBOL(scsw_is_valid_fctl);
 
 /**
  * scsw_is_valid_key - check key field validity
@@ -755,14 +872,13 @@
  * regardless of whether it is a transport mode or command mode scsw.
  * Return zero if the field does not contain a valid value.
  */
-int scsw_is_valid_key(union scsw *scsw)
+static inline int scsw_is_valid_key(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw_tm_is_valid_key(scsw);
 	else
 		return scsw_cmd_is_valid_key(scsw);
 }
-EXPORT_SYMBOL(scsw_is_valid_key);
 
 /**
  * scsw_is_valid_pno - check pno field validity
@@ -772,14 +888,13 @@
  * regardless of whether it is a transport mode or command mode scsw.
  * Return zero if the field does not contain a valid value.
  */
-int scsw_is_valid_pno(union scsw *scsw)
+static inline int scsw_is_valid_pno(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw_tm_is_valid_pno(scsw);
 	else
 		return scsw_cmd_is_valid_pno(scsw);
 }
-EXPORT_SYMBOL(scsw_is_valid_pno);
 
 /**
  * scsw_is_valid_stctl - check stctl field validity
@@ -789,14 +904,13 @@
  * regardless of whether it is a transport mode or command mode scsw.
  * Return zero if the field does not contain a valid value.
  */
-int scsw_is_valid_stctl(union scsw *scsw)
+static inline int scsw_is_valid_stctl(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw_tm_is_valid_stctl(scsw);
 	else
 		return scsw_cmd_is_valid_stctl(scsw);
 }
-EXPORT_SYMBOL(scsw_is_valid_stctl);
 
 /**
  * scsw_cmd_is_solicited - check for solicited scsw
@@ -805,12 +919,11 @@
  * Return non-zero if the command mode scsw indicates that the associated
  * status condition is solicited, zero if it is unsolicited.
  */
-int scsw_cmd_is_solicited(union scsw *scsw)
+static inline int scsw_cmd_is_solicited(union scsw *scsw)
 {
 	return (scsw->cmd.cc != 0) || (scsw->cmd.stctl !=
 		(SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
 }
-EXPORT_SYMBOL(scsw_cmd_is_solicited);
 
 /**
  * scsw_tm_is_solicited - check for solicited scsw
@@ -819,12 +932,11 @@
  * Return non-zero if the transport mode scsw indicates that the associated
  * status condition is solicited, zero if it is unsolicited.
  */
-int scsw_tm_is_solicited(union scsw *scsw)
+static inline int scsw_tm_is_solicited(union scsw *scsw)
 {
 	return (scsw->tm.cc != 0) || (scsw->tm.stctl !=
 		(SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
 }
-EXPORT_SYMBOL(scsw_tm_is_solicited);
 
 /**
  * scsw_is_solicited - check for solicited scsw
@@ -833,11 +945,12 @@
  * Return non-zero if the transport or command mode scsw indicates that the
  * associated status condition is solicited, zero if it is unsolicited.
  */
-int scsw_is_solicited(union scsw *scsw)
+static inline int scsw_is_solicited(union scsw *scsw)
 {
 	if (scsw_is_tm(scsw))
 		return scsw_tm_is_solicited(scsw);
 	else
 		return scsw_cmd_is_solicited(scsw);
 }
-EXPORT_SYMBOL(scsw_is_solicited);
+
+#endif /* _ASM_S390_SCSW_H_ */

diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 38b0fc2..e37478e 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h

@@ -8,7 +8,7 @@
 #ifndef _ASM_S390_SETUP_H
 #define _ASM_S390_SETUP_H
 
-#define COMMAND_LINE_SIZE	1024
+#define COMMAND_LINE_SIZE	4096
 
 #define ARCH_COMMAND_LINE_SIZE	896
 

diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 72137bc..c991fe6 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h

@@ -51,32 +51,7 @@
 #define PROC_CHANGE_PENALTY	20		/* Schedule penalty */
 
 #define raw_smp_processor_id()	(S390_lowcore.cpu_nr)
-
-/*
- * returns 1 if cpu is in stopped/check stopped state or not operational
- * returns 0 otherwise
- */
-static inline int
-smp_cpu_not_running(int cpu)
-{
-	__u32 status;
-
-	switch (signal_processor_ps(&status, 0, cpu, sigp_sense)) {
-	case sigp_order_code_accepted:
-	case sigp_status_stored:
-		/* Check for stopped and check stop state */
-		if (status & 0x50)
-			return 1;
-		break;
-	case sigp_not_operational:
-		return 1;
-	default:
-		break;
-	}
-	return 0;
-}
-
-#define cpu_logical_map(cpu) (cpu)
+#define cpu_logical_map(cpu)	(cpu)
 
 extern int __cpu_disable (void);
 extern void __cpu_die (unsigned int cpu);
@@ -91,11 +66,6 @@
 
 #endif
 
-#ifndef CONFIG_SMP
-#define hard_smp_processor_id()		0
-#define smp_cpu_not_running(cpu)	1
-#endif
-
 #ifdef CONFIG_HOTPLUG_CPU
 extern int smp_rescan_cpus(void);
 #else

diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 4fb83c1..379661d 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h

@@ -109,11 +109,7 @@
 #define pfault_fini()		do { } while (0)
 #endif /* CONFIG_PFAULT */
 
-#ifdef CONFIG_PAGE_STATES
 extern void cmma_init(void);
-#else
-static inline void cmma_init(void) { }
-#endif
 
 #define finish_arch_switch(prev) do {					     \
 	set_fs(current->thread.mm_segment);				     \

diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index cc21e3e..24aa1cd 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h

@@ -90,4 +90,18 @@
 
 extern u64 sched_clock_base_cc;
 
+/**
+ * get_clock_monotonic - returns current time in clock rate units
+ *
+ * The caller must ensure that preemption is disabled.
+ * The clock and sched_clock_base get changed via stop_machine.
+ * Therefore preemption must be disabled when calling this
+ * function, otherwise the returned value is not guaranteed to
+ * be monotonic.
+ */
+static inline unsigned long long get_clock_monotonic(void)
+{
+	return get_clock_xt() - sched_clock_base_cc;
+}
+
 #endif

diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index c75ed43..c7be8e1 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile

@@ -32,7 +32,7 @@
 
 obj-$(CONFIG_MODULES)		+= s390_ksyms.o module.o
 obj-$(CONFIG_SMP)		+= smp.o topology.o
-
+obj-$(CONFIG_HIBERNATION)	+= suspend.o swsusp_asm64.o
 obj-$(CONFIG_AUDIT)		+= audit.o
 compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
 obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o \
@@ -41,7 +41,7 @@
 
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
-obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o
+obj-$(CONFIG_FUNCTION_TRACER)	+= $(if $(CONFIG_64BIT),mcount64.o,mcount.o)
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
 

diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index cae14c4..bf8b4ae 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c

@@ -6,6 +6,9 @@
  *		 Heiko Carstens <heiko.carstens@de.ibm.com>
  */
 
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/compiler.h>
 #include <linux/init.h>
 #include <linux/errno.h>
@@ -16,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/pfn.h>
 #include <linux/uaccess.h>
+#include <linux/kernel.h>
 #include <asm/ebcdic.h>
 #include <asm/ipl.h>
 #include <asm/lowcore.h>
@@ -35,8 +39,6 @@
 
 char kernel_nss_name[NSS_NAME_SIZE + 1];
 
-static unsigned long machine_flags;
-
 static void __init setup_boot_command_line(void);
 
 /*
@@ -81,6 +83,8 @@
 	"	br	14\n"
 	"	.size	savesys_ipl_nss, .-savesys_ipl_nss\n");
 
+static __initdata char upper_command_line[COMMAND_LINE_SIZE];
+
 static noinline __init void create_kernel_nss(void)
 {
 	unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size;
@@ -90,7 +94,6 @@
 	int response;
 	size_t len;
 	char *savesys_ptr;
-	char upper_command_line[COMMAND_LINE_SIZE];
 	char defsys_cmd[DEFSYS_CMD_SIZE];
 	char savesys_cmd[SAVESYS_CMD_SIZE];
 
@@ -141,6 +144,8 @@
 	__cpcmd(defsys_cmd, NULL, 0, &response);
 
 	if (response != 0) {
+		pr_err("Defining the Linux kernel NSS failed with rc=%d\n",
+			response);
 		kernel_nss_name[0] = '\0';
 		return;
 	}
@@ -153,8 +158,11 @@
 	 *	       max SAVESYS_CMD_SIZE
 	 * On error: response contains the numeric portion of cp error message.
 	 *	     for SAVESYS it will be >= 263
+	 *	     for missing privilege class, it will be 1
 	 */
-	if (response > SAVESYS_CMD_SIZE) {
+	if (response > SAVESYS_CMD_SIZE || response == 1) {
+		pr_err("Saving the Linux kernel NSS failed with rc=%d\n",
+			response);
 		kernel_nss_name[0] = '\0';
 		return;
 	}
@@ -205,12 +213,9 @@
 
 	/* Running under KVM? If not we assume z/VM */
 	if (!memcmp(vmms.vm[0].cpi, "\xd2\xe5\xd4", 3))
-		machine_flags |= MACHINE_FLAG_KVM;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
 	else
-		machine_flags |= MACHINE_FLAG_VM;
-
-	/* Store machine flags for setting up lowcore early */
-	S390_lowcore.machine_flags = machine_flags;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
 }
 
 static __init void early_pgm_check_handler(void)
@@ -245,7 +250,7 @@
 	facilities = stfl();
 	if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29)))
 		return;
-	machine_flags |= MACHINE_FLAG_HPAGE;
+	S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE;
 	__ctl_set_bit(0, 23);
 #endif
 }
@@ -263,7 +268,7 @@
 		EX_TABLE(0b,1b)
 		: "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0");
 	if (!rc)
-		machine_flags |= MACHINE_FLAG_MVPG;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_MVPG;
 #endif
 }
 
@@ -279,7 +284,7 @@
 		EX_TABLE(0b,1b)
 		: "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc");
 	if (!rc)
-		machine_flags |= MACHINE_FLAG_IEEE;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_IEEE;
 #endif
 }
 
@@ -298,7 +303,7 @@
 		EX_TABLE(0b,1b)
 		: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2");
 	if (!rc)
-		machine_flags |= MACHINE_FLAG_CSP;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_CSP;
 #endif
 }
 
@@ -315,7 +320,7 @@
 		EX_TABLE(0b,1b)
 		: "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
 	if (!rc)
-		machine_flags |= MACHINE_FLAG_DIAG9C;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
 }
 
 static __init void detect_diag44(void)
@@ -330,7 +335,7 @@
 		EX_TABLE(0b,1b)
 		: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
 	if (!rc)
-		machine_flags |= MACHINE_FLAG_DIAG44;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
 #endif
 }
 
@@ -341,11 +346,11 @@
 
 	facilities = stfl();
 	if (facilities & (1 << 28))
-		machine_flags |= MACHINE_FLAG_IDTE;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
 	if (facilities & (1 << 23))
-		machine_flags |= MACHINE_FLAG_PFMF;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF;
 	if (facilities & (1 << 4))
-		machine_flags |= MACHINE_FLAG_MVCOS;
+		S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS;
 #endif
 }
 
@@ -367,21 +372,35 @@
 }
 
 /* Set up boot command line */
+static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t))
+{
+	char *parm, *delim;
+	size_t rc, len;
+
+	len = strlen(boot_command_line);
+
+	delim = boot_command_line + len;	/* '\0' character position */
+	parm  = boot_command_line + len + 1;	/* append right after '\0' */
+
+	rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1);
+	if (rc) {
+		if (*parm == '=')
+			memmove(boot_command_line, parm + 1, rc);
+		else
+			*delim = ' ';		/* replace '\0' with space */
+	}
+}
+
 static void __init setup_boot_command_line(void)
 {
-	char *parm = NULL;
-
 	/* copy arch command line */
 	strlcpy(boot_command_line, COMMAND_LINE, ARCH_COMMAND_LINE_SIZE);
 
 	/* append IPL PARM data to the boot command line */
-	if (MACHINE_IS_VM) {
-		parm = boot_command_line + strlen(boot_command_line);
-		*parm++ = ' ';
-		get_ipl_vmparm(parm);
-		if (parm[0] == '=')
-			memmove(boot_command_line, parm + 1, strlen(parm));
-	}
+	if (MACHINE_IS_VM)
+		append_to_cmdline(append_ipl_vmparm);
+
+	append_to_cmdline(append_ipl_scpdata);
 }
 
 
@@ -413,7 +432,6 @@
 	setup_hpage();
 	sclp_facilities_detect();
 	detect_memory_layout(memory_chunk);
-	S390_lowcore.machine_flags = machine_flags;
 #ifdef CONFIG_DYNAMIC_FTRACE
 	S390_lowcore.ftrace_func = (unsigned long)ftrace_caller;
 #endif

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index c4c80a2..f78580a 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S

@@ -278,7 +278,8 @@
 	bnz	BASED(sysc_work)  # there is work to do (signals etc.)
 sysc_restore:
 #ifdef CONFIG_TRACE_IRQFLAGS
-	la	%r1,BASED(sysc_restore_trace_psw)
+	la	%r1,BASED(sysc_restore_trace_psw_addr)
+	l	%r1,0(%r1)
 	lpsw	0(%r1)
 sysc_restore_trace:
 	TRACE_IRQS_CHECK
@@ -289,10 +290,15 @@
 sysc_done:
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+sysc_restore_trace_psw_addr:
+	.long sysc_restore_trace_psw
+
+	.section .data,"aw",@progbits
 	.align	8
 	.globl	sysc_restore_trace_psw
 sysc_restore_trace_psw:
 	.long	0, sysc_restore_trace + 0x80000000
+	.previous
 #endif
 
 #
@@ -606,7 +612,8 @@
 	bnz	BASED(io_work)		# there is work to do (signals etc.)
 io_restore:
 #ifdef CONFIG_TRACE_IRQFLAGS
-	la	%r1,BASED(io_restore_trace_psw)
+	la	%r1,BASED(io_restore_trace_psw_addr)
+	l	%r1,0(%r1)
 	lpsw	0(%r1)
 io_restore_trace:
 	TRACE_IRQS_CHECK
@@ -617,10 +624,15 @@
 io_done:
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+io_restore_trace_psw_addr:
+	.long io_restore_trace_psw
+
+	.section .data,"aw",@progbits
 	.align	8
 	.globl	io_restore_trace_psw
 io_restore_trace_psw:
 	.long	0, io_restore_trace + 0x80000000
+	.previous
 #endif
 
 #

diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index f6618e9..009ca61 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S

@@ -284,10 +284,12 @@
 sysc_done:
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+	.section .data,"aw",@progbits
 	.align	8
 	.globl sysc_restore_trace_psw
 sysc_restore_trace_psw:
 	.quad	0, sysc_restore_trace
+	.previous
 #endif
 
 #
@@ -595,10 +597,12 @@
 io_done:
 
 #ifdef CONFIG_TRACE_IRQFLAGS
+	.section .data,"aw",@progbits
 	.align	8
 	.globl io_restore_trace_psw
 io_restore_trace_psw:
 	.quad	0, io_restore_trace
+	.previous
 #endif
 
 #

diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index ec68823..c52b4f7 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S

@@ -27,6 +27,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
+#include <asm/cpu.h>
 
 #ifdef CONFIG_64BIT
 #define ARCH_OFFSET	4

diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index 2ced846..602b508 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S

@@ -24,6 +24,7 @@
 # Setup stack
 #
 	l	%r15,.Linittu-.LPG1(%r13)
+	st	%r15,__LC_THREAD_INFO	# cache thread info in lowcore
 	mvc	__LC_CURRENT(4),__TI_task(%r15)
 	ahi	%r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE
 	st	%r15,__LC_KERNEL_STACK	# set end of kernel stack

diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 65667b2..6a25080 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S

@@ -62,9 +62,9 @@
 	clr	%r11,%r12
 	je	5f			# no more space in prefix array
 4:
-	ahi	%r8,1				# next cpu (r8 += 1)
-	cl	%r8,.Llast_cpu-.LPG1(%r13)	# is last possible cpu ?
-	jl	1b				# jump if not last cpu
+	ahi	%r8,1			# next cpu (r8 += 1)
+	chi	%r8,MAX_CPU_ADDRESS	# is last possible cpu ?
+	jle	1b			# jump if not last cpu
 5:
 	lhi	%r1,2			# mode 2 = esame (dump)
 	j	6f
@@ -92,6 +92,7 @@
 # Setup stack
 #
 	larl	%r15,init_thread_union
+	stg	%r15,__LC_THREAD_INFO	# cache thread info in lowcore
 	lg	%r14,__TI_task(%r15)	# cache current in lowcore
 	stg	%r14,__LC_CURRENT
 	aghi	%r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE
@@ -129,8 +130,6 @@
 #ifdef CONFIG_ZFCPDUMP
 .Lcurrent_cpu:
 	.long 0x0
-.Llast_cpu:
-	.long 0x0000ffff
 .Lpref_arr_ptr:
 	.long zfcpdump_prefix_array
 #endif /* CONFIG_ZFCPDUMP */

diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 371a2d8..ee57a42 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c

@@ -272,17 +272,18 @@
 static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
 
 /* VM IPL PARM routines */
-static void reipl_get_ascii_vmparm(char *dest,
+size_t reipl_get_ascii_vmparm(char *dest, size_t size,
 				   const struct ipl_parameter_block *ipb)
 {
 	int i;
-	int len = 0;
+	size_t len;
 	char has_lowercase = 0;
 
+	len = 0;
 	if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
 	    (ipb->ipl_info.ccw.vm_parm_len > 0)) {
 
-		len = ipb->ipl_info.ccw.vm_parm_len;
+		len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len);
 		memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
 		/* If at least one character is lowercase, we assume mixed
 		 * case; otherwise we convert everything to lowercase.
@@ -299,14 +300,20 @@
 		EBCASC(dest, len);
 	}
 	dest[len] = 0;
+
+	return len;
 }
 
-void get_ipl_vmparm(char *dest)
+size_t append_ipl_vmparm(char *dest, size_t size)
 {
+	size_t rc;
+
+	rc = 0;
 	if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
-		reipl_get_ascii_vmparm(dest, &ipl_block);
+		rc = reipl_get_ascii_vmparm(dest, size, &ipl_block);
 	else
 		dest[0] = 0;
+	return rc;
 }
 
 static ssize_t ipl_vm_parm_show(struct kobject *kobj,
@@ -314,10 +321,65 @@
 {
 	char parm[DIAG308_VMPARM_SIZE + 1] = {};
 
-	get_ipl_vmparm(parm);
+	append_ipl_vmparm(parm, sizeof(parm));
 	return sprintf(page, "%s\n", parm);
 }
 
+static size_t scpdata_length(const char* buf, size_t count)
+{
+	while (count) {
+		if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
+			break;
+		count--;
+	}
+	return count;
+}
+
+size_t reipl_append_ascii_scpdata(char *dest, size_t size,
+				  const struct ipl_parameter_block *ipb)
+{
+	size_t count;
+	size_t i;
+	int has_lowercase;
+
+	count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
+					     ipb->ipl_info.fcp.scp_data_len));
+	if (!count)
+		goto out;
+
+	has_lowercase = 0;
+	for (i = 0; i < count; i++) {
+		if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
+			count = 0;
+			goto out;
+		}
+		if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i]))
+			has_lowercase = 1;
+	}
+
+	if (has_lowercase)
+		memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
+	else
+		for (i = 0; i < count; i++)
+			dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]);
+out:
+	dest[count] = '\0';
+	return count;
+}
+
+size_t append_ipl_scpdata(char *dest, size_t len)
+{
+	size_t rc;
+
+	rc = 0;
+	if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP)
+		rc = reipl_append_ascii_scpdata(dest, len, &ipl_block);
+	else
+		dest[0] = 0;
+	return rc;
+}
+
+
 static struct kobj_attribute sys_ipl_vm_parm_attr =
 	__ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL);
 
@@ -553,7 +615,7 @@
 {
 	char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
 
-	reipl_get_ascii_vmparm(vmparm, ipb);
+	reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
 	return sprintf(page, "%s\n", vmparm);
 }
 
@@ -626,6 +688,59 @@
 
 /* FCP reipl device attributes */
 
+static ssize_t reipl_fcp_scpdata_read(struct kobject *kobj,
+				      struct bin_attribute *attr,
+				      char *buf, loff_t off, size_t count)
+{
+	size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len;
+	void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_fcp_scpdata_write(struct kobject *kobj,
+				       struct bin_attribute *attr,
+				       char *buf, loff_t off, size_t count)
+{
+	size_t padding;
+	size_t scpdata_len;
+
+	if (off < 0)
+		return -EINVAL;
+
+	if (off >= DIAG308_SCPDATA_SIZE)
+		return -ENOSPC;
+
+	if (count > DIAG308_SCPDATA_SIZE - off)
+		count = DIAG308_SCPDATA_SIZE - off;
+
+	memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count);
+	scpdata_len = off + count;
+
+	if (scpdata_len % 8) {
+		padding = 8 - (scpdata_len % 8);
+		memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
+		       0, padding);
+		scpdata_len += padding;
+	}
+
+	reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len;
+	reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len;
+	reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len;
+
+	return count;
+}
+
+static struct bin_attribute sys_reipl_fcp_scp_data_attr = {
+	.attr = {
+		.name = "scp_data",
+		.mode = S_IRUGO | S_IWUSR,
+	},
+	.size = PAGE_SIZE,
+	.read = reipl_fcp_scpdata_read,
+	.write = reipl_fcp_scpdata_write,
+};
+
 DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n",
 		   reipl_block_fcp->ipl_info.fcp.wwpn);
 DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n",
@@ -647,7 +762,6 @@
 };
 
 static struct attribute_group reipl_fcp_attr_group = {
-	.name  = IPL_FCP_STR,
 	.attrs = reipl_fcp_attrs,
 };
 
@@ -895,6 +1009,7 @@
 	__ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store);
 
 static struct kset *reipl_kset;
+static struct kset *reipl_fcp_kset;
 
 static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb,
 			   const enum ipl_method m)
@@ -906,7 +1021,7 @@
 
 	reipl_get_ascii_loadparm(loadparm, ipb);
 	reipl_get_ascii_nss_name(nss_name, ipb);
-	reipl_get_ascii_vmparm(vmparm, ipb);
+	reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
 
 	switch (m) {
 	case REIPL_METHOD_CCW_VM:
@@ -1076,23 +1191,44 @@
 	int rc;
 
 	if (!diag308_set_works) {
-		if (ipl_info.type == IPL_TYPE_FCP)
+		if (ipl_info.type == IPL_TYPE_FCP) {
 			make_attrs_ro(reipl_fcp_attrs);
-		else
+			sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO;
+		} else
 			return 0;
 	}
 
 	reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!reipl_block_fcp)
 		return -ENOMEM;
-	rc = sysfs_create_group(&reipl_kset->kobj, &reipl_fcp_attr_group);
+
+	/* sysfs: create fcp kset for mixing attr group and bin attrs */
+	reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
+					     &reipl_kset->kobj);
+	if (!reipl_kset) {
+		free_page((unsigned long) reipl_block_fcp);
+		return -ENOMEM;
+	}
+
+	rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
 	if (rc) {
-		free_page((unsigned long)reipl_block_fcp);
+		kset_unregister(reipl_fcp_kset);
+		free_page((unsigned long) reipl_block_fcp);
 		return rc;
 	}
-	if (ipl_info.type == IPL_TYPE_FCP) {
+
+	rc = sysfs_create_bin_file(&reipl_fcp_kset->kobj,
+				   &sys_reipl_fcp_scp_data_attr);
+	if (rc) {
+		sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+		kset_unregister(reipl_fcp_kset);
+		free_page((unsigned long) reipl_block_fcp);
+		return rc;
+	}
+
+	if (ipl_info.type == IPL_TYPE_FCP)
 		memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE);
-	} else {
+	else {
 		reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
 		reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
 		reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;

diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 2a0a5e9..dfe015d 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S

@@ -11,111 +11,27 @@
 ftrace_stub:
 	br	%r14
 
-#ifdef CONFIG_64BIT
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
 	.globl _mcount
 _mcount:
-	br	%r14
-
-	.globl ftrace_caller
-ftrace_caller:
-	larl	%r1,function_trace_stop
-	icm	%r1,0xf,0(%r1)
-	bnzr	%r14
-	stmg	%r2,%r5,32(%r15)
-	stg	%r14,112(%r15)
-	lgr	%r1,%r15
-	aghi	%r15,-160
-	stg	%r1,__SF_BACKCHAIN(%r15)
-	lgr	%r2,%r14
-	lg	%r3,168(%r15)
-	larl	%r14,ftrace_dyn_func
-	lg	%r14,0(%r14)
-	basr	%r14,%r14
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.globl	ftrace_graph_caller
-ftrace_graph_caller:
-	# This unconditional branch gets runtime patched. Change only if
-	# you know what you are doing. See ftrace_enable_graph_caller().
-	j	0f
-	lg	%r2,272(%r15)
-	lg	%r3,168(%r15)
-	brasl	%r14,prepare_ftrace_return
-	stg	%r2,168(%r15)
-0:
-#endif
-	aghi	%r15,160
-	lmg	%r2,%r5,32(%r15)
-	lg	%r14,112(%r15)
+#ifdef CONFIG_DYNAMIC_FTRACE
 	br	%r14
 
 	.data
 	.globl	ftrace_dyn_func
 ftrace_dyn_func:
-	.quad	ftrace_stub
+	.long	ftrace_stub
 	.previous
 
-#else /* CONFIG_DYNAMIC_FTRACE */
-
-	.globl _mcount
-_mcount:
-	larl	%r1,function_trace_stop
-	icm	%r1,0xf,0(%r1)
-	bnzr	%r14
-	stmg	%r2,%r5,32(%r15)
-	stg	%r14,112(%r15)
-	lgr	%r1,%r15
-	aghi	%r15,-160
-	stg	%r1,__SF_BACKCHAIN(%r15)
-	lgr	%r2,%r14
-	lg	%r3,168(%r15)
-	larl	%r14,ftrace_trace_function
-	lg	%r14,0(%r14)
-	basr	%r14,%r14
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	lg	%r2,272(%r15)
-	lg	%r3,168(%r15)
-	brasl	%r14,prepare_ftrace_return
-	stg	%r2,168(%r15)
-#endif
-	aghi	%r15,160
-	lmg	%r2,%r5,32(%r15)
-	lg	%r14,112(%r15)
-	br	%r14
-
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-
-	.globl	return_to_handler
-return_to_handler:
-	stmg	%r2,%r5,32(%r15)
-	lgr	%r1,%r15
-	aghi	%r15,-160
-	stg	%r1,__SF_BACKCHAIN(%r15)
-	brasl	%r14,ftrace_return_to_handler
-	aghi	%r15,160
-	lgr	%r14,%r2
-	lmg	%r2,%r5,32(%r15)
-	br	%r14
-
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#else /* CONFIG_64BIT */
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-	.globl _mcount
-_mcount:
-	br	%r14
-
 	.globl ftrace_caller
 ftrace_caller:
+#endif
 	stm	%r2,%r5,16(%r15)
 	bras	%r1,2f
+#ifdef CONFIG_DYNAMIC_FTRACE
+0:	.long	ftrace_dyn_func
+#else
 0:	.long	ftrace_trace_function
+#endif
 1:	.long	function_trace_stop
 2:	l	%r2,1b-0b(%r1)
 	icm	%r2,0xf,0(%r2)
@@ -131,11 +47,13 @@
 	l	%r14,0(%r14)
 	basr	%r14,%r14
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
 	.globl	ftrace_graph_caller
 ftrace_graph_caller:
 	# This unconditional branch gets runtime patched. Change only if
 	# you know what you are doing. See ftrace_enable_graph_caller().
 	j	1f
+#endif
 	bras	%r1,0f
 	.long	prepare_ftrace_return
 0:	l	%r2,152(%r15)
@@ -150,49 +68,6 @@
 3:	lm	%r2,%r5,16(%r15)
 	br	%r14
 
-	.data
-	.globl	ftrace_dyn_func
-ftrace_dyn_func:
-	.long	ftrace_stub
-	.previous
-
-#else /* CONFIG_DYNAMIC_FTRACE */
-
-	.globl _mcount
-_mcount:
-	stm	%r2,%r5,16(%r15)
-	bras	%r1,2f
-0:	.long	ftrace_trace_function
-1:	.long	function_trace_stop
-2:	l	%r2,1b-0b(%r1)
-	icm	%r2,0xf,0(%r2)
-	jnz	3f
-	st	%r14,56(%r15)
-	lr	%r0,%r15
-	ahi	%r15,-96
-	l	%r3,100(%r15)
-	la	%r2,0(%r14)
-	st	%r0,__SF_BACKCHAIN(%r15)
-	la	%r3,0(%r3)
-	l	%r14,0b-0b(%r1)
-	l	%r14,0(%r14)
-	basr	%r14,%r14
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	bras	%r1,0f
-	.long	prepare_ftrace_return
-0:	l	%r2,152(%r15)
-	l	%r4,0(%r1)
-	l	%r3,100(%r15)
-	basr	%r14,%r4
-	st	%r2,100(%r15)
-#endif
-	ahi	%r15,96
-	l	%r14,56(%r15)
-3:	lm	%r2,%r5,16(%r15)
-	br	%r14
-
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 	.globl	return_to_handler
@@ -211,6 +86,4 @@
 	lm	%r2,%r5,16(%r15)
 	br	%r14
 
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#endif /* CONFIG_64BIT */
+#endif

diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S
new file mode 100644
index 0000000..c37211c
--- /dev/null
+++ b/arch/s390/kernel/mcount64.S

@@ -0,0 +1,78 @@
+/*
+ * Copyright IBM Corp. 2008,2009
+ *
+ *   Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <asm/asm-offsets.h>
+
+	.globl ftrace_stub
+ftrace_stub:
+	br	%r14
+
+	.globl _mcount
+_mcount:
+#ifdef CONFIG_DYNAMIC_FTRACE
+	br	%r14
+
+	.data
+	.globl	ftrace_dyn_func
+ftrace_dyn_func:
+	.quad	ftrace_stub
+	.previous
+
+	.globl ftrace_caller
+ftrace_caller:
+#endif
+	larl	%r1,function_trace_stop
+	icm	%r1,0xf,0(%r1)
+	bnzr	%r14
+	stmg	%r2,%r5,32(%r15)
+	stg	%r14,112(%r15)
+	lgr	%r1,%r15
+	aghi	%r15,-160
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r14
+	lg	%r3,168(%r15)
+#ifdef CONFIG_DYNAMIC_FTRACE
+	larl	%r14,ftrace_dyn_func
+#else
+	larl	%r14,ftrace_trace_function
+#endif
+	lg	%r14,0(%r14)
+	basr	%r14,%r14
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+	.globl	ftrace_graph_caller
+ftrace_graph_caller:
+	# This unconditional branch gets runtime patched. Change only if
+	# you know what you are doing. See ftrace_enable_graph_caller().
+	j	0f
+#endif
+	lg	%r2,272(%r15)
+	lg	%r3,168(%r15)
+	brasl	%r14,prepare_ftrace_return
+	stg	%r2,168(%r15)
+0:
+#endif
+	aghi	%r15,160
+	lmg	%r2,%r5,32(%r15)
+	lg	%r14,112(%r15)
+	br	%r14
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+	.globl	return_to_handler
+return_to_handler:
+	stmg	%r2,%r5,32(%r15)
+	lgr	%r1,%r15
+	aghi	%r15,-160
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	brasl	%r14,ftrace_return_to_handler
+	aghi	%r15,160
+	lgr	%r14,%r2
+	lmg	%r2,%r5,32(%r15)
+	br	%r14
+
+#endif

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index cbb897b..9ed13a1 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c

@@ -156,15 +156,11 @@
 
 static void __init set_preferred_console(void)
 {
-	if (MACHINE_IS_KVM) {
+	if (MACHINE_IS_KVM)
 		add_preferred_console("hvc", 0, NULL);
-		s390_virtio_console_init();
-		return;
-	}
-
-	if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+	else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
 		add_preferred_console("ttyS", 0, NULL);
-	if (CONSOLE_IS_3270)
+	else if (CONSOLE_IS_3270)
 		add_preferred_console("tty3270", 0, NULL);
 }
 

diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 062bd64..6b4fef8 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c

@@ -536,4 +536,6 @@
 {
 	clear_thread_flag(TIF_NOTIFY_RESUME);
 	tracehook_notify_resume(regs);
+	if (current->replacement_session_keyring)
+		key_replace_session_keyring();
 }

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index be2cae0..56c1687 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c

@@ -49,6 +49,7 @@
 #include <asm/sclp.h>
 #include <asm/cputime.h>
 #include <asm/vdso.h>
+#include <asm/cpu.h>
 #include "entry.h"
 
 static struct task_struct *current_set[NR_CPUS];
@@ -70,6 +71,23 @@
 
 static void smp_ext_bitcall(int, ec_bit_sig);
 
+static int cpu_stopped(int cpu)
+{
+	__u32 status;
+
+	switch (signal_processor_ps(&status, 0, cpu, sigp_sense)) {
+	case sigp_order_code_accepted:
+	case sigp_status_stored:
+		/* Check for stopped and check stop state */
+		if (status & 0x50)
+			return 1;
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
 void smp_send_stop(void)
 {
 	int cpu, rc;
@@ -86,7 +104,7 @@
 			rc = signal_processor(cpu, sigp_stop);
 		} while (rc == sigp_busy);
 
-		while (!smp_cpu_not_running(cpu))
+		while (!cpu_stopped(cpu))
 			cpu_relax();
 	}
 }
@@ -269,19 +287,6 @@
 
 #endif /* CONFIG_ZFCPDUMP */
 
-static int cpu_stopped(int cpu)
-{
-	__u32 status;
-
-	/* Check for stopped state */
-	if (signal_processor_ps(&status, 0, cpu, sigp_sense) ==
-	    sigp_status_stored) {
-		if (status & 0x40)
-			return 1;
-	}
-	return 0;
-}
-
 static int cpu_known(int cpu_id)
 {
 	int cpu;
@@ -300,7 +305,7 @@
 	logical_cpu = cpumask_first(&avail);
 	if (logical_cpu >= nr_cpu_ids)
 		return 0;
-	for (cpu_id = 0; cpu_id <= 65535; cpu_id++) {
+	for (cpu_id = 0; cpu_id <= MAX_CPU_ADDRESS; cpu_id++) {
 		if (cpu_known(cpu_id))
 			continue;
 		__cpu_logical_map[logical_cpu] = cpu_id;
@@ -379,7 +384,7 @@
 	/* Use sigp detection algorithm if sclp doesn't work. */
 	if (sclp_get_cpu_info(info)) {
 		smp_use_sigp_detection = 1;
-		for (cpu = 0; cpu <= 65535; cpu++) {
+		for (cpu = 0; cpu <= MAX_CPU_ADDRESS; cpu++) {
 			if (cpu == boot_cpu_addr)
 				continue;
 			__cpu_logical_map[CPU_INIT_NO] = cpu;
@@ -635,7 +640,7 @@
 void __cpu_die(unsigned int cpu)
 {
 	/* Wait until target cpu is down */
-	while (!smp_cpu_not_running(cpu))
+	while (!cpu_stopped(cpu))
 		cpu_relax();
 	smp_free_lowcore(cpu);
 	pr_info("Processor %d stopped\n", cpu);

diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
new file mode 100644
index 0000000..086bee9
--- /dev/null
+++ b/arch/s390/kernel/suspend.c

@@ -0,0 +1,73 @@
+/*
+ * Suspend support specific for s390.
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
+ */
+
+#include <linux/suspend.h>
+#include <linux/reboot.h>
+#include <linux/pfn.h>
+#include <linux/mm.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/ipl.h>
+
+/*
+ * References to section boundaries
+ */
+extern const void __nosave_begin, __nosave_end;
+
+/*
+ *  check if given pfn is in the 'nosave' or in the read only NSS section
+ */
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
+	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end))
+					>> PAGE_SHIFT;
+	unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1;
+	unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
+
+	if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
+		return 1;
+	if (pfn >= stext_pfn && pfn <= eshared_pfn) {
+		if (ipl_info.type == IPL_TYPE_NSS)
+			return 1;
+	} else if ((tprot(pfn * PAGE_SIZE) && pfn > 0))
+		return 1;
+	return 0;
+}
+
+void save_processor_state(void)
+{
+	/* swsusp_arch_suspend() actually saves all cpu register contents.
+	 * Machine checks must be disabled since swsusp_arch_suspend() stores
+	 * register contents to their lowcore save areas. That's the same
+	 * place where register contents on machine checks would be saved.
+	 * To avoid register corruption disable machine checks.
+	 * We must also disable machine checks in the new psw mask for
+	 * program checks, since swsusp_arch_suspend() may generate program
+	 * checks. Disabling machine checks for all other new psw masks is
+	 * just paranoia.
+	 */
+	local_mcck_disable();
+	/* Disable lowcore protection */
+	__ctl_clear_bit(0,28);
+	S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
+	S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
+	S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
+	S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
+}
+
+void restore_processor_state(void)
+{
+	S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
+	S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
+	S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
+	S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
+	/* Enable lowcore protection */
+	__ctl_set_bit(0,28);
+	local_mcck_enable();
+}

diff --git a/arch/s390/power/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
similarity index 99%
rename from arch/s390/power/swsusp_asm64.S
rename to arch/s390/kernel/swsusp_asm64.S
index b26df5c..7cd6b09 100644
--- a/arch/s390/power/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S

@@ -21,7 +21,7 @@
  * This function runs with disabled interrupts.
  */
 	.section .text
-	.align	2
+	.align	4
 	.globl swsusp_arch_suspend
 swsusp_arch_suspend:
 	stmg	%r6,%r15,__SF_GPRS(%r15)

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index d4c8e9c..54e327e 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c

@@ -60,6 +60,7 @@
 #define TICK_SIZE tick
 
 u64 sched_clock_base_cc = -1;	/* Force to data section. */
+EXPORT_SYMBOL_GPL(sched_clock_base_cc);
 
 static DEFINE_PER_CPU(struct clock_event_device, comparators);
 
@@ -68,7 +69,7 @@
  */
 unsigned long long notrace sched_clock(void)
 {
-	return ((get_clock_xt() - sched_clock_base_cc) * 125) >> 9;
+	return (get_clock_monotonic() * 125) >> 9;
 }
 
 /*

diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index a53db23..7315f9e 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S

@@ -52,55 +52,18 @@
 	. = ALIGN(PAGE_SIZE);
 	_eshared = .;		/* End of shareable data */
 
-	. = ALIGN(16);		/* Exception table */
-	__ex_table : {
-		__start___ex_table = .;
-		*(__ex_table)
-		__stop___ex_table = .;
-	} :data
+	EXCEPTION_TABLE(16) :data
 
-	.data : {		/* Data */
-		DATA_DATA
-		CONSTRUCTORS
-	}
+	RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
 
-	. = ALIGN(PAGE_SIZE);
-	.data_nosave : {
-	__nosave_begin = .;
-		*(.data.nosave)
-	}
-	. = ALIGN(PAGE_SIZE);
-	__nosave_end = .;
-
-	. = ALIGN(PAGE_SIZE);
-	.data.page_aligned : {
-		*(.data.idt)
-	}
-
-	. = ALIGN(0x100);
-	.data.cacheline_aligned : {
-		*(.data.cacheline_aligned)
-	}
-
-	. = ALIGN(0x100);
-	.data.read_mostly : {
-		*(.data.read_mostly)
-	}
 	_edata = .;		/* End of data section */
 
-	. = ALIGN(THREAD_SIZE);	/* init_task */
-	.data.init_task : {
-		*(.data.init_task)
-	}
-
 	/* will be freed after init */
 	. = ALIGN(PAGE_SIZE);	/* Init code and data */
 	__init_begin = .;
-	.init.text : {
-		_sinittext = .;
-		INIT_TEXT
-		_einittext = .;
-	}
+
+	INIT_TEXT_SECTION(PAGE_SIZE)
+
 	/*
 	 * .exit.text is discarded at runtime, not link time,
 	 * to deal with references from __bug_table
@@ -111,49 +74,13 @@
 
 	/* early.c uses stsi, which requires page aligned data. */
 	. = ALIGN(PAGE_SIZE);
-	.init.data : {
-		INIT_DATA
-	}
-	. = ALIGN(0x100);
-	.init.setup : {
-		__setup_start = .;
-		*(.init.setup)
-		__setup_end = .;
-	}
-	.initcall.init : {
-		__initcall_start = .;
-		INITCALLS
-		__initcall_end = .;
-	}
-
-	.con_initcall.init : {
-		__con_initcall_start = .;
-		*(.con_initcall.init)
-		__con_initcall_end = .;
-	}
-	SECURITY_INIT
-
-#ifdef CONFIG_BLK_DEV_INITRD
-	. = ALIGN(0x100);
-	.init.ramfs : {
-		__initramfs_start = .;
-		*(.init.ramfs)
-		. = ALIGN(2);
-		__initramfs_end = .;
-	}
-#endif
+	INIT_DATA_SECTION(0x100)
 
 	PERCPU(PAGE_SIZE)
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;		/* freed after init ends here */
 
-	/* BSS */
-	.bss : {
-		__bss_start = .;
-		*(.bss)
-		. = ALIGN(2);
-		__bss_stop = .;
-	}
+	BSS_SECTION(0, 2, 0)
 
 	_end = . ;
 

diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index db05661..eec0544 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile

@@ -2,7 +2,7 @@
 # Makefile for the linux s390-specific parts of the memory manager.
 #
 
-obj-y	 := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o
+obj-y	 := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \
+	    page-states.o
 obj-$(CONFIG_CMM) += cmm.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_PAGE_STATES) += page-states.o

diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index e5e119f..1abbadd 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c

@@ -10,6 +10,7 @@
  *    Copyright (C) 1995  Linus Torvalds
  */
 
+#include <linux/perf_counter.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -305,7 +306,7 @@
 	 * interrupts again and then search the VMAs
 	 */
 	local_irq_enable();
-
+	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 	down_read(&mm->mmap_sem);
 
 	si_code = SEGV_MAPERR;
@@ -363,11 +364,15 @@
 		}
 		BUG();
 	}
-	if (fault & VM_FAULT_MAJOR)
+	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-	else
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+				     regs, address);
+	} else {
 		tsk->min_flt++;
-
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+				     regs, address);
+	}
         up_read(&mm->mmap_sem);
 	/*
 	 * The instruction that caused the program check will

diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index fc0ad73..f92ec20 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c

@@ -1,6 +1,4 @@
 /*
- * arch/s390/mm/page-states.c
- *
  * Copyright IBM Corp. 2008
  *
  * Guest page hinting for unused pages.
@@ -17,11 +15,12 @@
 #define ESSA_SET_STABLE		1
 #define ESSA_SET_UNUSED		2
 
-static int cmma_flag;
+static int cmma_flag = 1;
 
 static int __init cmma(char *str)
 {
 	char *parm;
+
 	parm = strstrip(str);
 	if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) {
 		cmma_flag = 1;
@@ -32,7 +31,6 @@
 		return 1;
 	return 0;
 }
-
 __setup("cmma=", cmma);
 
 void __init cmma_init(void)

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5656672..c702152 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c

@@ -78,9 +78,9 @@
 		}
 		page->index = page_to_phys(shadow);
 	}
-	spin_lock(&mm->page_table_lock);
+	spin_lock(&mm->context.list_lock);
 	list_add(&page->lru, &mm->context.crst_list);
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(&mm->context.list_lock);
 	return (unsigned long *) page_to_phys(page);
 }
 
@@ -89,9 +89,9 @@
 	unsigned long *shadow = get_shadow_table(table);
 	struct page *page = virt_to_page(table);
 
-	spin_lock(&mm->page_table_lock);
+	spin_lock(&mm->context.list_lock);
 	list_del(&page->lru);
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(&mm->context.list_lock);
 	if (shadow)
 		free_pages((unsigned long) shadow, ALLOC_ORDER);
 	free_pages((unsigned long) table, ALLOC_ORDER);
@@ -182,7 +182,7 @@
 	unsigned long bits;
 
 	bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
-	spin_lock(&mm->page_table_lock);
+	spin_lock(&mm->context.list_lock);
 	page = NULL;
 	if (!list_empty(&mm->context.pgtable_list)) {
 		page = list_first_entry(&mm->context.pgtable_list,
@@ -191,7 +191,7 @@
 			page = NULL;
 	}
 	if (!page) {
-		spin_unlock(&mm->page_table_lock);
+		spin_unlock(&mm->context.list_lock);
 		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
 		if (!page)
 			return NULL;
@@ -202,7 +202,7 @@
 			clear_table_pgstes(table);
 		else
 			clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
-		spin_lock(&mm->page_table_lock);
+		spin_lock(&mm->context.list_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
 	}
 	table = (unsigned long *) page_to_phys(page);
@@ -213,7 +213,7 @@
 	page->flags |= bits;
 	if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
 		list_move_tail(&page->lru, &mm->context.pgtable_list);
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(&mm->context.list_lock);
 	return table;
 }
 
@@ -225,7 +225,7 @@
 	bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
 	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	spin_lock(&mm->page_table_lock);
+	spin_lock(&mm->context.list_lock);
 	page->flags ^= bits;
 	if (page->flags & FRAG_MASK) {
 		/* Page now has some free pgtable fragments. */
@@ -234,7 +234,7 @@
 	} else
 		/* All fragments of the 4K page have been freed. */
 		list_del(&page->lru);
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(&mm->context.list_lock);
 	if (page) {
 		pgtable_page_dtor(page);
 		__free_page(page);
@@ -245,7 +245,7 @@
 {
 	struct page *page;
 
-	spin_lock(&mm->page_table_lock);
+	spin_lock(&mm->context.list_lock);
 	/* Free shadow region and segment tables. */
 	list_for_each_entry(page, &mm->context.crst_list, lru)
 		if (page->index) {
@@ -255,7 +255,7 @@
 	/* "Free" second halves of page tables. */
 	list_for_each_entry(page, &mm->context.pgtable_list, lru)
 		page->flags &= ~SECOND_HALVES;
-	spin_unlock(&mm->page_table_lock);
+	spin_unlock(&mm->context.list_lock);
 	mm->context.noexec = 0;
 	update_mm(mm, tsk);
 }

diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index e4868bf..5f91a38 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c

@@ -331,6 +331,7 @@
 	unsigned long start, end;
 	int i;
 
+	spin_lock_init(&init_mm.context.list_lock);
 	INIT_LIST_HEAD(&init_mm.context.crst_list);
 	INIT_LIST_HEAD(&init_mm.context.pgtable_list);
 	init_mm.context.noexec = 0;

diff --git a/arch/s390/power/Makefile b/arch/s390/power/Makefile
deleted file mode 100644
index 973bb45..0000000
--- a/arch/s390/power/Makefile
+++ /dev/null

@@ -1,8 +0,0 @@
-#
-# Makefile for s390 PM support
-#
-
-obj-$(CONFIG_HIBERNATION) += suspend.o
-obj-$(CONFIG_HIBERNATION) += swsusp.o
-obj-$(CONFIG_HIBERNATION) += swsusp_64.o
-obj-$(CONFIG_HIBERNATION) += swsusp_asm64.o

diff --git a/arch/s390/power/suspend.c b/arch/s390/power/suspend.c
deleted file mode 100644
index b3351ec..0000000
--- a/arch/s390/power/suspend.c
+++ /dev/null

@@ -1,40 +0,0 @@
-/*
- * Suspend support specific for s390.
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- */
-
-#include <linux/mm.h>
-#include <linux/suspend.h>
-#include <linux/reboot.h>
-#include <linux/pfn.h>
-#include <asm/sections.h>
-#include <asm/ipl.h>
-
-/*
- * References to section boundaries
- */
-extern const void __nosave_begin, __nosave_end;
-
-/*
- *  check if given pfn is in the 'nosave' or in the read only NSS section
- */
-int pfn_is_nosave(unsigned long pfn)
-{
-	unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
-	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end))
-					>> PAGE_SHIFT;
-	unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1;
-	unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
-
-	if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
-		return 1;
-	if (pfn >= stext_pfn && pfn <= eshared_pfn) {
-		if (ipl_info.type == IPL_TYPE_NSS)
-			return 1;
-	} else if ((tprot(pfn * PAGE_SIZE) && pfn > 0))
-		return 1;
-	return 0;
-}

diff --git a/arch/s390/power/swsusp.c b/arch/s390/power/swsusp.c
deleted file mode 100644
index bd1f5c6..0000000
--- a/arch/s390/power/swsusp.c
+++ /dev/null

@@ -1,42 +0,0 @@
-/*
- * Support for suspend and resume on s390
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- *
- */
-
-#include <asm/system.h>
-
-void save_processor_state(void)
-{
-	/* swsusp_arch_suspend() actually saves all cpu register contents.
-	 * Machine checks must be disabled since swsusp_arch_suspend() stores
-	 * register contents to their lowcore save areas. That's the same
-	 * place where register contents on machine checks would be saved.
-	 * To avoid register corruption disable machine checks.
-	 * We must also disable machine checks in the new psw mask for
-	 * program checks, since swsusp_arch_suspend() may generate program
-	 * checks. Disabling machine checks for all other new psw masks is
-	 * just paranoia.
-	 */
-	local_mcck_disable();
-	/* Disable lowcore protection */
-	__ctl_clear_bit(0,28);
-	S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
-	S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
-	S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
-	S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
-}
-
-void restore_processor_state(void)
-{
-	S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
-	S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
-	S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
-	S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
-	/* Enable lowcore protection */
-	__ctl_set_bit(0,28);
-	local_mcck_enable();
-}

diff --git a/arch/s390/power/swsusp_64.c b/arch/s390/power/swsusp_64.c
deleted file mode 100644
index 9516a51..0000000
--- a/arch/s390/power/swsusp_64.c
+++ /dev/null

@@ -1,17 +0,0 @@
-/*
- * Support for suspend and resume on s390
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- *
- */
-
-#include <asm/system.h>
-#include <linux/interrupt.h>
-
-void do_after_copyback(void)
-{
-	mb();
-}
-

diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index b5afbec..04a2188 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c

@@ -640,5 +640,7 @@
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }

diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 0663a0e..9e5c9b1 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c

@@ -772,5 +772,7 @@
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }

diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 181d069..7ce1a10 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c

@@ -590,6 +590,8 @@
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
 

diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index ec82d76..647afbd 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c

@@ -613,5 +613,8 @@
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
+

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 676debf..128111d 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c

@@ -20,6 +20,7 @@
 #include <linux/bitops.h>
 #include <linux/ioport.h>
 #include <linux/suspend.h>
+#include <linux/kmemleak.h>
 #include <asm/e820.h>
 #include <asm/io.h>
 #include <asm/iommu.h>
@@ -94,6 +95,11 @@
 	 * code for safe
 	 */
 	p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
+	/*
+	 * Kmemleak should not scan this block as it may not be mapped via the
+	 * kernel direct mapping.
+	 */
+	kmemleak_ignore(p);
 	if (!p || __pa(p)+aper_size > 0xffffffff) {
 		printk(KERN_ERR
 			"Cannot allocate aperture memory hole (%p,%uK)\n",

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1a041bc..fa80f60 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c

@@ -3,6 +3,7 @@
 #include <linux/dmar.h>
 #include <linux/bootmem.h>
 #include <linux/pci.h>
+#include <linux/kmemleak.h>
 
 #include <asm/proto.h>
 #include <asm/dma.h>
@@ -88,6 +89,11 @@
 	size = roundup(dma32_bootmem_size, align);
 	dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
 				 512ULL<<20);
+	/*
+	 * Kmemleak should not scan this block as it may not be mapped via the
+	 * kernel direct mapping.
+	 */
+	kmemleak_ignore(dma32_bootmem_ptr);
 	if (dma32_bootmem_ptr)
 		dma32_bootmem_size = size;
 	else

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4c57875..81e58238 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c

@@ -869,6 +869,8 @@
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 
 #ifdef CONFIG_X86_32

diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 2c55ed0..528bf95 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c

@@ -331,6 +331,20 @@
 	kmemcheck_shadow_set(shadow, size);
 }
 
+bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
+{
+	enum kmemcheck_shadow status;
+	void *shadow;
+
+	shadow = kmemcheck_shadow_lookup(addr);
+	if (!shadow)
+		return true;
+
+	status = kmemcheck_shadow_test(shadow, size);
+
+	return status == KMEMCHECK_SHADOW_INITIALIZED;
+}
+
 /* Access may cross page boundary */
 static void kmemcheck_read(struct pt_regs *regs,
 	unsigned long addr, unsigned int size)

diff --git a/block/blk-core.c b/block/blk-core.c
index e3299a7..e695634 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c

@@ -501,6 +501,7 @@
 			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
+	q->backing_dev_info.name = "block";
 
 	err = bdi_init(&q->backing_dev_info);
 	if (err) {

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 1e15889..95d3449 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c

@@ -268,6 +268,7 @@
 	if (!d->blkq)
 		goto err_mempool;
 	blk_queue_make_request(d->blkq, aoeblk_make_request);
+	d->blkq->backing_dev_info.name = "aoe";
 	if (bdi_init(&d->blkq->backing_dev_info))
 		goto err_blkq;
 	spin_lock_irqsave(&d->lock, flags);

diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c
index 86105ef..0ecac7e 100644
--- a/drivers/char/hvc_iucv.c
+++ b/drivers/char/hvc_iucv.c

@@ -1006,7 +1006,7 @@
 	priv->dev->release = (void (*)(struct device *)) kfree;
 	rc = device_register(priv->dev);
 	if (rc) {
-		kfree(priv->dev);
+		put_device(priv->dev);
 		goto out_error_dev;
 	}
 

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index afa8813..645237b 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c

@@ -822,6 +822,7 @@
  * - permits private mappings, "copies" are taken of the source of zeros
  */
 static struct backing_dev_info zero_bdi = {
+	.name		= "char/mem",
 	.capabilities	= BDI_CAP_MAP_COPY,
 };
 

diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index aec1931..0b73e4e 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c

@@ -450,6 +450,12 @@
 		goto out_err;
 	}
 
+	/* Default timeouts */
+	chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
+	chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT);
+	chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
+	chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
+
 	if (request_locality(chip, 0) != 0) {
 		rc = -ENODEV;
 		goto out_err;
@@ -457,12 +463,6 @@
 
 	vendor = ioread32(chip->vendor.iobase + TPM_DID_VID(0));
 
-	/* Default timeouts */
-	chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
-	chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT);
-	chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
-	chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
-
 	dev_info(dev,
 		 "1.2 TPM (device-id 0x%X, rev-id %d)\n",
 		 vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0)));

diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 8f9509e..55d093a 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c

@@ -362,6 +362,7 @@
 		 * In either case, must tell the provider to reject.
 		 */
 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		cm_id->device->iwcm->reject(cm_id, NULL, 0);
 		break;
 	case IW_CM_STATE_CONN_SENT:
 	case IW_CM_STATE_DESTROYING:

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index de922a0..7522008 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c

@@ -2,6 +2,7 @@
  * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -45,14 +46,21 @@
 MODULE_AUTHOR("Hal Rosenstock");
 MODULE_AUTHOR("Sean Hefty");
 
+int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
+int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
+
+module_param_named(send_queue_size, mad_sendq_size, int, 0444);
+MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
+module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
+MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
+
 static struct kmem_cache *ib_mad_cache;
 
 static struct list_head ib_mad_port_list;
 static u32 ib_mad_client_id = 0;
 
 /* Port list lock */
-static spinlock_t ib_mad_port_list_lock;
-
+static DEFINE_SPINLOCK(ib_mad_port_list_lock);
 
 /* Forward declarations */
 static int method_in_use(struct ib_mad_mgmt_method_table **method,
@@ -1974,7 +1982,7 @@
 	unsigned long delay;
 
 	if (list_empty(&mad_agent_priv->wait_list)) {
-		cancel_delayed_work(&mad_agent_priv->timed_work);
+		__cancel_delayed_work(&mad_agent_priv->timed_work);
 	} else {
 		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
 					 struct ib_mad_send_wr_private,
@@ -1983,7 +1991,7 @@
 		if (time_after(mad_agent_priv->timeout,
 			       mad_send_wr->timeout)) {
 			mad_agent_priv->timeout = mad_send_wr->timeout;
-			cancel_delayed_work(&mad_agent_priv->timed_work);
+			__cancel_delayed_work(&mad_agent_priv->timed_work);
 			delay = mad_send_wr->timeout - jiffies;
 			if ((long)delay <= 0)
 				delay = 1;
@@ -2023,7 +2031,7 @@
 
 	/* Reschedule a work item if we have a shorter timeout */
 	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
-		cancel_delayed_work(&mad_agent_priv->timed_work);
+		__cancel_delayed_work(&mad_agent_priv->timed_work);
 		queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
 				   &mad_agent_priv->timed_work, delay);
 	}
@@ -2736,8 +2744,8 @@
 	qp_init_attr.send_cq = qp_info->port_priv->cq;
 	qp_init_attr.recv_cq = qp_info->port_priv->cq;
 	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-	qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE;
-	qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE;
+	qp_init_attr.cap.max_send_wr = mad_sendq_size;
+	qp_init_attr.cap.max_recv_wr = mad_recvq_size;
 	qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
 	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
 	qp_init_attr.qp_type = qp_type;
@@ -2752,8 +2760,8 @@
 		goto error;
 	}
 	/* Use minimum queue sizes unless the CQ is resized */
-	qp_info->send_queue.max_active = IB_MAD_QP_SEND_SIZE;
-	qp_info->recv_queue.max_active = IB_MAD_QP_RECV_SIZE;
+	qp_info->send_queue.max_active = mad_sendq_size;
+	qp_info->recv_queue.max_active = mad_recvq_size;
 	return 0;
 
 error:
@@ -2792,7 +2800,7 @@
 	init_mad_qp(port_priv, &port_priv->qp_info[0]);
 	init_mad_qp(port_priv, &port_priv->qp_info[1]);
 
-	cq_size = (IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE) * 2;
+	cq_size = (mad_sendq_size + mad_recvq_size) * 2;
 	port_priv->cq = ib_create_cq(port_priv->device,
 				     ib_mad_thread_completion_handler,
 				     NULL, port_priv, cq_size, 0);
@@ -2984,7 +2992,11 @@
 {
 	int ret;
 
-	spin_lock_init(&ib_mad_port_list_lock);
+	mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
+	mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
+
+	mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
+	mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
 
 	ib_mad_cache = kmem_cache_create("ib_mad",
 					 sizeof(struct ib_mad_private),
@@ -3021,4 +3033,3 @@
 
 module_init(ib_mad_init_module);
 module_exit(ib_mad_cleanup_module);
-

diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 05ce331..9430ab4 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h

@@ -2,6 +2,7 @@
  * Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 Intel Corporation. All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -49,6 +50,8 @@
 /* QP and CQ parameters */
 #define IB_MAD_QP_SEND_SIZE	128
 #define IB_MAD_QP_RECV_SIZE	512
+#define IB_MAD_QP_MIN_SIZE	64
+#define IB_MAD_QP_MAX_SIZE	8192
 #define IB_MAD_SEND_REQ_MAX_SG	2
 #define IB_MAD_RECV_REQ_MAX_SG	1
 

diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 107f170..8d82ba1 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c

@@ -106,6 +106,8 @@
 	struct ib_sa_query	*query;
 	int			query_id;
 	u16			pkey_index;
+	u8			leave_state;
+	int			retries;
 };
 
 struct mcast_member {
@@ -350,6 +352,7 @@
 
 	rec = group->rec;
 	rec.join_state = leave_state;
+	group->leave_state = leave_state;
 
 	ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
 				       port->port_num, IB_SA_METHOD_DELETE, &rec,
@@ -542,7 +545,11 @@
 {
 	struct mcast_group *group = context;
 
-	mcast_work_handler(&group->work);
+	if (status && group->retries > 0 &&
+	    !send_leave(group, group->leave_state))
+		group->retries--;
+	else
+		mcast_work_handler(&group->work);
 }
 
 static struct mcast_group *acquire_group(struct mcast_port *port,
@@ -565,6 +572,7 @@
 	if (!group)
 		return NULL;
 
+	group->retries = 3;
 	group->port = port;
 	group->rec.mgid = *mgid;
 	group->pkey_index = MCAST_INVALID_PKEY_INDEX;

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 1865049..8254371 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c

@@ -109,10 +109,10 @@
 	.remove = ib_sa_remove_one
 };
 
-static spinlock_t idr_lock;
+static DEFINE_SPINLOCK(idr_lock);
 static DEFINE_IDR(query_idr);
 
-static spinlock_t tid_lock;
+static DEFINE_SPINLOCK(tid_lock);
 static u32 tid;
 
 #define PATH_REC_FIELD(field) \
@@ -1077,9 +1077,6 @@
 {
 	int ret;
 
-	spin_lock_init(&idr_lock);
-	spin_lock_init(&tid_lock);
-
 	get_random_bytes(&tid, sizeof tid);
 
 	ret = ib_register_client(&sa_client);

diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index 8723675..5855e44 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c

@@ -52,6 +52,10 @@
 	hop_cnt = smp->hop_cnt;
 
 	/* See section 14.2.2.2, Vol 1 IB spec */
+	/* C14-6 -- valid hop_cnt values are from 0 to 63 */
+	if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
+		return IB_SMI_DISCARD;
+
 	if (!ib_get_smp_direction(smp)) {
 		/* C14-9:1 */
 		if (hop_cnt && hop_ptr == 0) {
@@ -133,6 +137,10 @@
 	hop_cnt = smp->hop_cnt;
 
 	/* See section 14.2.2.2, Vol 1 IB spec */
+	/* C14-6 -- valid hop_cnt values are from 0 to 63 */
+	if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
+		return IB_SMI_DISCARD;
+
 	if (!ib_get_smp_direction(smp)) {
 		/* C14-9:1 -- sender should have incremented hop_ptr */
 		if (hop_cnt && hop_ptr == 0)

diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index eb36a81..d3fff9e 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c

@@ -73,7 +73,7 @@
 DEFINE_IDR(ib_uverbs_qp_idr);
 DEFINE_IDR(ib_uverbs_srq_idr);
 
-static spinlock_t map_lock;
+static DEFINE_SPINLOCK(map_lock);
 static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
 static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
 
@@ -584,14 +584,16 @@
 
 	if (hdr.command < 0				||
 	    hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
-	    !uverbs_cmd_table[hdr.command]		||
-	    !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
+	    !uverbs_cmd_table[hdr.command])
 		return -EINVAL;
 
 	if (!file->ucontext &&
 	    hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
 		return -EINVAL;
 
+	if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
+		return -ENOSYS;
+
 	return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
 					     hdr.in_words * 4, hdr.out_words * 4);
 }
@@ -836,8 +838,6 @@
 {
 	int ret;
 
-	spin_lock_init(&map_lock);
-
 	ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
 				     "infiniband_verbs");
 	if (ret) {

diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 0cfbb6d..8250740 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c

@@ -86,11 +86,7 @@
 
 static void c2_print_macaddr(struct net_device *netdev)
 {
-	pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, "
-		"IRQ %u\n", netdev->name,
-		netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
-		netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
-		netdev->irq);
+	pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name, netdev->dev_addr, netdev->irq);
 }
 
 static void c2_set_rxbufsize(struct c2_port *c2_port)

diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index f1948fa..ad723bd 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c

@@ -780,11 +780,11 @@
 	/* Register pseudo network device */
 	dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
 	if (!dev->pseudo_netdev)
-		goto out3;
+		goto out;
 
 	ret = register_netdev(dev->pseudo_netdev);
 	if (ret)
-		goto out2;
+		goto out_free_netdev;
 
 	pr_debug("%s:%u\n", __func__, __LINE__);
 	strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
@@ -851,6 +851,10 @@
 	dev->ibdev.post_recv = c2_post_receive;
 
 	dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
+	if (dev->ibdev.iwcm == NULL) {
+		ret = -ENOMEM;
+		goto out_unregister_netdev;
+	}
 	dev->ibdev.iwcm->add_ref = c2_add_ref;
 	dev->ibdev.iwcm->rem_ref = c2_rem_ref;
 	dev->ibdev.iwcm->get_qp = c2_get_qp;
@@ -862,23 +866,25 @@
 
 	ret = ib_register_device(&dev->ibdev);
 	if (ret)
-		goto out1;
+		goto out_free_iwcm;
 
 	for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) {
 		ret = device_create_file(&dev->ibdev.dev,
 					       c2_dev_attributes[i]);
 		if (ret)
-			goto out0;
+			goto out_unregister_ibdev;
 	}
-	goto out3;
+	goto out;
 
-out0:
+out_unregister_ibdev:
 	ib_unregister_device(&dev->ibdev);
-out1:
+out_free_iwcm:
+	kfree(dev->ibdev.iwcm);
+out_unregister_netdev:
 	unregister_netdev(dev->pseudo_netdev);
-out2:
+out_free_netdev:
 	free_netdev(dev->pseudo_netdev);
-out3:
+out:
 	pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret);
 	return ret;
 }

diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 62f9cf2..72ed339 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c

@@ -852,7 +852,9 @@
 	wqe->qpcaps = attr->qpcaps;
 	wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
 	wqe->rqe_count = cpu_to_be16(attr->rqe_count);
-	wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type));
+	wqe->flags_rtr_type = cpu_to_be16(attr->flags |
+					  V_RTR_TYPE(attr->rtr_type) |
+					  V_CHAN(attr->chan));
 	wqe->ord = cpu_to_be32(attr->ord);
 	wqe->ird = cpu_to_be32(attr->ird);
 	wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
@@ -1032,6 +1034,7 @@
 err2:
 	cxio_hal_destroy_ctrl_qp(rdev_p);
 err1:
+	rdev_p->t3cdev_p->ulp = NULL;
 	list_del(&rdev_p->entry);
 	return err;
 }

diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index 32e3b14..a197a5b 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h

@@ -327,6 +327,11 @@
 #define V_RTR_TYPE(x)	((x) << S_RTR_TYPE)
 #define G_RTR_TYPE(x)	((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
 
+#define S_CHAN		4
+#define M_CHAN		0x3
+#define V_CHAN(x)	((x) << S_CHAN)
+#define G_CHAN(x)	((((x) >> S_CHAN)) & M_CHAN)
+
 struct t3_rdma_init_attr {
 	u32 tid;
 	u32 qpid;
@@ -346,6 +351,7 @@
 	u16 flags;
 	u16 rqe_count;
 	u32 irs;
+	u32 chan;
 };
 
 struct t3_rdma_init_wr {

diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 26fc0a4..b0ea010 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c

@@ -51,7 +51,7 @@
 
 static void open_rnic_dev(struct t3cdev *);
 static void close_rnic_dev(struct t3cdev *);
-static void iwch_err_handler(struct t3cdev *, u32, u32);
+static void iwch_event_handler(struct t3cdev *, u32, u32);
 
 struct cxgb3_client t3c_client = {
 	.name = "iw_cxgb3",
@@ -59,7 +59,7 @@
 	.remove = close_rnic_dev,
 	.handlers = t3c_handlers,
 	.redirect = iwch_ep_redirect,
-	.err_handler = iwch_err_handler
+	.event_handler = iwch_event_handler
 };
 
 static LIST_HEAD(dev_list);
@@ -105,11 +105,9 @@
 static void open_rnic_dev(struct t3cdev *tdev)
 {
 	struct iwch_dev *rnicp;
-	static int vers_printed;
 
 	PDBG("%s t3cdev %p\n", __func__,  tdev);
-	if (!vers_printed++)
-		printk(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
+	printk_once(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
 		       DRV_VERSION);
 	rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
 	if (!rnicp) {
@@ -162,21 +160,36 @@
 	mutex_unlock(&dev_mutex);
 }
 
-static void iwch_err_handler(struct t3cdev *tdev, u32 status, u32 error)
+static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
 {
 	struct cxio_rdev *rdev = tdev->ulp;
-	struct iwch_dev *rnicp = rdev_to_iwch_dev(rdev);
+	struct iwch_dev *rnicp;
 	struct ib_event event;
+	u32    portnum = port_id + 1;
 
-	if (status == OFFLOAD_STATUS_DOWN) {
+	if (!rdev)
+		return;
+	rnicp = rdev_to_iwch_dev(rdev);
+	switch (evt) {
+	case OFFLOAD_STATUS_DOWN: {
 		rdev->flags = CXIO_ERROR_FATAL;
-
-		event.device = &rnicp->ibdev;
 		event.event  = IB_EVENT_DEVICE_FATAL;
-		event.element.port_num = 0;
-		ib_dispatch_event(&event);
+		break;
+		}
+	case OFFLOAD_PORT_DOWN: {
+		event.event  = IB_EVENT_PORT_ERR;
+		break;
+		}
+	case OFFLOAD_PORT_UP: {
+		event.event  = IB_EVENT_PORT_ACTIVE;
+		break;
+		}
 	}
 
+	event.device = &rnicp->ibdev;
+	event.element.port_num = portnum;
+	ib_dispatch_event(&event);
+
 	return;
 }
 

diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 52d7bb0..66b4135 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c

@@ -286,7 +286,7 @@
 	ep = container_of(container_of(kref, struct iwch_ep_common, kref),
 			  struct iwch_ep, com);
 	PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
-	if (ep->com.flags & RELEASE_RESOURCES) {
+	if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
 		cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
 		dst_release(ep->dst);
 		l2t_release(L2DATA(ep->com.tdev), ep->l2t);
@@ -297,7 +297,7 @@
 static void release_ep_resources(struct iwch_ep *ep)
 {
 	PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
-	ep->com.flags |= RELEASE_RESOURCES;
+	set_bit(RELEASE_RESOURCES, &ep->com.flags);
 	put_ep(&ep->com);
 }
 
@@ -786,10 +786,12 @@
 	event.private_data_len = ep->plen;
 	event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 	event.provider_data = ep;
-	if (state_read(&ep->parent_ep->com) != DEAD)
+	if (state_read(&ep->parent_ep->com) != DEAD) {
+		get_ep(&ep->com);
 		ep->parent_ep->com.cm_id->event_handler(
 						ep->parent_ep->com.cm_id,
 						&event);
+	}
 	put_ep(&ep->parent_ep->com);
 	ep->parent_ep = NULL;
 }
@@ -1156,8 +1158,7 @@
 	 * We get 2 abort replies from the HW.  The first one must
 	 * be ignored except for scribbling that we need one more.
 	 */
-	if (!(ep->com.flags & ABORT_REQ_IN_PROGRESS)) {
-		ep->com.flags |= ABORT_REQ_IN_PROGRESS;
+	if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
 		return CPL_RET_BUF_DONE;
 	}
 
@@ -1477,10 +1478,14 @@
 		/*
 		 * We're gonna mark this puppy DEAD, but keep
 		 * the reference on it until the ULP accepts or
-		 * rejects the CR.
+		 * rejects the CR. Also wake up anyone waiting
+		 * in rdma connection migration (see iwch_accept_cr()).
 		 */
 		__state_set(&ep->com, CLOSING);
-		get_ep(&ep->com);
+		ep->com.rpl_done = 1;
+		ep->com.rpl_err = -ECONNRESET;
+		PDBG("waking up ep %p\n", ep);
+		wake_up(&ep->com.waitq);
 		break;
 	case MPA_REP_SENT:
 		__state_set(&ep->com, CLOSING);
@@ -1561,8 +1566,7 @@
 	 * We get 2 peer aborts from the HW.  The first one must
 	 * be ignored except for scribbling that we need one more.
 	 */
-	if (!(ep->com.flags & PEER_ABORT_IN_PROGRESS)) {
-		ep->com.flags |= PEER_ABORT_IN_PROGRESS;
+	if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
 		return CPL_RET_BUF_DONE;
 	}
 
@@ -1589,9 +1593,13 @@
 		/*
 		 * We're gonna mark this puppy DEAD, but keep
 		 * the reference on it until the ULP accepts or
-		 * rejects the CR.
+		 * rejects the CR. Also wake up anyone waiting
+		 * in rdma connection migration (see iwch_accept_cr()).
 		 */
-		get_ep(&ep->com);
+		ep->com.rpl_done = 1;
+		ep->com.rpl_err = -ECONNRESET;
+		PDBG("waking up ep %p\n", ep);
+		wake_up(&ep->com.waitq);
 		break;
 	case MORIBUND:
 	case CLOSING:
@@ -1797,6 +1805,7 @@
 		err = send_mpa_reject(ep, pdata, pdata_len);
 		err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
 	}
+	put_ep(&ep->com);
 	return 0;
 }
 
@@ -1810,8 +1819,10 @@
 	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
 
 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
-	if (state_read(&ep->com) == DEAD)
-		return -ECONNRESET;
+	if (state_read(&ep->com) == DEAD) {
+		err = -ECONNRESET;
+		goto err;
+	}
 
 	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
 	BUG_ON(!qp);
@@ -1819,15 +1830,14 @@
 	if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
 	    (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
 		abort_connection(ep, NULL, GFP_KERNEL);
-		return -EINVAL;
+		err = -EINVAL;
+		goto err;
 	}
 
 	cm_id->add_ref(cm_id);
 	ep->com.cm_id = cm_id;
 	ep->com.qp = qp;
 
-	ep->com.rpl_done = 0;
-	ep->com.rpl_err = 0;
 	ep->ird = conn_param->ird;
 	ep->ord = conn_param->ord;
 
@@ -1836,8 +1846,6 @@
 
 	PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
 
-	get_ep(&ep->com);
-
 	/* bind QP to EP and move to RTS */
 	attrs.mpa_attr = ep->mpa_attr;
 	attrs.max_ird = ep->ird;
@@ -1855,30 +1863,31 @@
 	err = iwch_modify_qp(ep->com.qp->rhp,
 			     ep->com.qp, mask, &attrs, 1);
 	if (err)
-		goto err;
+		goto err1;
 
 	/* if needed, wait for wr_ack */
 	if (iwch_rqes_posted(qp)) {
 		wait_event(ep->com.waitq, ep->com.rpl_done);
 		err = ep->com.rpl_err;
 		if (err)
-			goto err;
+			goto err1;
 	}
 
 	err = send_mpa_reply(ep, conn_param->private_data,
 			     conn_param->private_data_len);
 	if (err)
-		goto err;
+		goto err1;
 
 
 	state_set(&ep->com, FPDU_MODE);
 	established_upcall(ep);
 	put_ep(&ep->com);
 	return 0;
-err:
+err1:
 	ep->com.cm_id = NULL;
 	ep->com.qp = NULL;
 	cm_id->rem_ref(cm_id);
+err:
 	put_ep(&ep->com);
 	return err;
 }
@@ -2097,14 +2106,17 @@
 			ep->com.state = CLOSING;
 			start_ep_timer(ep);
 		}
+		set_bit(CLOSE_SENT, &ep->com.flags);
 		break;
 	case CLOSING:
-		close = 1;
-		if (abrupt) {
-			stop_ep_timer(ep);
-			ep->com.state = ABORTING;
-		} else
-			ep->com.state = MORIBUND;
+		if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
+			close = 1;
+			if (abrupt) {
+				stop_ep_timer(ep);
+				ep->com.state = ABORTING;
+			} else
+				ep->com.state = MORIBUND;
+		}
 		break;
 	case MORIBUND:
 	case ABORTING:

diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index 43c0aea..b9efadf 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h

@@ -145,9 +145,10 @@
 };
 
 enum iwch_ep_flags {
-	PEER_ABORT_IN_PROGRESS	= (1 << 0),
-	ABORT_REQ_IN_PROGRESS	= (1 << 1),
-	RELEASE_RESOURCES	= (1 << 2),
+	PEER_ABORT_IN_PROGRESS	= 0,
+	ABORT_REQ_IN_PROGRESS	= 1,
+	RELEASE_RESOURCES	= 2,
+	CLOSE_SENT		= 3,
 };
 
 struct iwch_ep_common {
@@ -162,7 +163,7 @@
 	wait_queue_head_t waitq;
 	int rpl_done;
 	int rpl_err;
-	u32 flags;
+	unsigned long flags;
 };
 
 struct iwch_listen_ep {

diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index ec49a5c..e1ec65e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c

@@ -39,7 +39,7 @@
 #include "iwch.h"
 #include "iwch_provider.h"
 
-static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
+static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
 {
 	u32 mmid;
 
@@ -47,14 +47,15 @@
 	mhp->attr.stag = stag;
 	mmid = stag >> 8;
 	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
-	insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
 	PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+	return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
 }
 
 int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
 		      struct iwch_mr *mhp, int shift)
 {
 	u32 stag;
+	int ret;
 
 	if (cxio_register_phys_mem(&rhp->rdev,
 				   &stag, mhp->attr.pdid,
@@ -66,9 +67,11 @@
 				   mhp->attr.pbl_size, mhp->attr.pbl_addr))
 		return -ENOMEM;
 
-	iwch_finish_mem_reg(mhp, stag);
-
-	return 0;
+	ret = iwch_finish_mem_reg(mhp, stag);
+	if (ret)
+		cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+		       mhp->attr.pbl_addr);
+	return ret;
 }
 
 int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
@@ -77,6 +80,7 @@
 					int npages)
 {
 	u32 stag;
+	int ret;
 
 	/* We could support this... */
 	if (npages > mhp->attr.pbl_size)
@@ -93,9 +97,12 @@
 				   mhp->attr.pbl_size, mhp->attr.pbl_addr))
 		return -ENOMEM;
 
-	iwch_finish_mem_reg(mhp, stag);
+	ret = iwch_finish_mem_reg(mhp, stag);
+	if (ret)
+		cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+		       mhp->attr.pbl_addr);
 
-	return 0;
+	return ret;
 }
 
 int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)

diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index e2a6321..6895523 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c

@@ -195,7 +195,11 @@
 	spin_lock_init(&chp->lock);
 	atomic_set(&chp->refcnt, 1);
 	init_waitqueue_head(&chp->wait);
-	insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+	if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) {
+		cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
+		kfree(chp);
+		return ERR_PTR(-ENOMEM);
+	}
 
 	if (ucontext) {
 		struct iwch_mm_entry *mm;
@@ -750,7 +754,11 @@
 	mhp->attr.stag = stag;
 	mmid = (stag) >> 8;
 	mhp->ibmw.rkey = stag;
-	insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+	if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
+		cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
+		kfree(mhp);
+		return ERR_PTR(-ENOMEM);
+	}
 	PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
 	return &(mhp->ibmw);
 }
@@ -778,37 +786,43 @@
 	struct iwch_mr *mhp;
 	u32 mmid;
 	u32 stag = 0;
-	int ret;
+	int ret = 0;
 
 	php = to_iwch_pd(pd);
 	rhp = php->rhp;
 	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
 	if (!mhp)
-		return ERR_PTR(-ENOMEM);
+		goto err;
 
 	mhp->rhp = rhp;
 	ret = iwch_alloc_pbl(mhp, pbl_depth);
-	if (ret) {
-		kfree(mhp);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto err1;
 	mhp->attr.pbl_size = pbl_depth;
 	ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
 				 mhp->attr.pbl_size, mhp->attr.pbl_addr);
-	if (ret) {
-		iwch_free_pbl(mhp);
-		kfree(mhp);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto err2;
 	mhp->attr.pdid = php->pdid;
 	mhp->attr.type = TPT_NON_SHARED_MR;
 	mhp->attr.stag = stag;
 	mhp->attr.state = 1;
 	mmid = (stag) >> 8;
 	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
-	insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+	if (insert_handle(rhp, &rhp->mmidr, mhp, mmid))
+		goto err3;
+
 	PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
 	return &(mhp->ibmr);
+err3:
+	cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
+		       mhp->attr.pbl_addr);
+err2:
+	iwch_free_pbl(mhp);
+err1:
+	kfree(mhp);
+err:
+	return ERR_PTR(ret);
 }
 
 static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
@@ -961,7 +975,13 @@
 	spin_lock_init(&qhp->lock);
 	init_waitqueue_head(&qhp->wait);
 	atomic_set(&qhp->refcnt, 1);
-	insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid);
+
+	if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) {
+		cxio_destroy_qp(&rhp->rdev, &qhp->wq,
+			ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+		kfree(qhp);
+		return ERR_PTR(-ENOMEM);
+	}
 
 	if (udata) {
 
@@ -1418,6 +1438,7 @@
 bail2:
 	ib_unregister_device(&dev->ibdev);
 bail1:
+	kfree(dev->ibdev.iwcm);
 	return ret;
 }
 
@@ -1430,5 +1451,6 @@
 		device_remove_file(&dev->ibdev.dev,
 				   iwch_class_attributes[i]);
 	ib_unregister_device(&dev->ibdev);
+	kfree(dev->ibdev.iwcm);
 	return;
 }

diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 27bbdc8..6e86534 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c

@@ -889,6 +889,7 @@
 	init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
 	init_attr.rqe_count = iwch_rqes_posted(qhp);
 	init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
+	init_attr.chan = qhp->ep->l2t->smt_idx;
 	if (peer2peer) {
 		init_attr.rtr_type = RTR_READ;
 		if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)

diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index fab18a2..5b635aa 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c

@@ -52,7 +52,7 @@
 #include "ehca_tools.h"
 #include "hcp_if.h"
 
-#define HCAD_VERSION "0028"
+#define HCAD_VERSION "0029"
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
@@ -64,7 +64,7 @@
 static int ehca_poll_all_eqs  = 1;
 
 int ehca_debug_level   = 0;
-int ehca_nr_ports      = 2;
+int ehca_nr_ports      = -1;
 int ehca_use_hp_mr     = 0;
 int ehca_port_act_time = 30;
 int ehca_static_rate   = -1;
@@ -95,8 +95,8 @@
 		 "Hardware level (0: autosensing (default), "
 		 "0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
 MODULE_PARM_DESC(nr_ports,
-		 "number of connected ports (-1: autodetect, 1: port one only, "
-		 "2: two ports (default)");
+		 "number of connected ports (-1: autodetect (default), "
+		 "1: port one only, 2: two ports)");
 MODULE_PARM_DESC(use_hp_mr,
 		 "Use high performance MRs (default: no)");
 MODULE_PARM_DESC(port_act_time,

diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 5a3d96f..8fd88cd 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c

@@ -786,7 +786,11 @@
 	wc->slid = cqe->rlid;
 	wc->dlid_path_bits = cqe->dlid;
 	wc->src_qp = cqe->remote_qp_number;
-	wc->wc_flags = cqe->w_completion_flags;
+	/*
+	 * HW has "Immed data present" and "GRH present" in bits 6 and 5.
+	 * SW defines those in bits 1 and 0, so we can just shift and mask.
+	 */
+	wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;
 	wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
 	wc->sl = cqe->service_level;
 

diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
index c568b28..8c1213f 100644
--- a/drivers/infiniband/hw/ehca/ehca_sqp.c
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c

@@ -125,14 +125,30 @@
 	u8 data[192];
 } __attribute__ ((packed));
 
+/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */
+struct tcslfl {
+	u32 tc:8;
+	u32 sl:4;
+	u32 fl:20;
+} __attribute__ ((packed));
+
+/* IP Version/TC/FL packed into 32 bits, as in GRH */
+struct vertcfl {
+	u32 ver:4;
+	u32 tc:8;
+	u32 fl:20;
+} __attribute__ ((packed));
 
 static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
+			     struct ib_wc *in_wc, struct ib_grh *in_grh,
 			     struct ib_mad *in_mad, struct ib_mad *out_mad)
 {
 	struct ib_perf *in_perf = (struct ib_perf *)in_mad;
 	struct ib_perf *out_perf = (struct ib_perf *)out_mad;
 	struct ib_class_port_info *poi =
 		(struct ib_class_port_info *)out_perf->data;
+	struct tcslfl *tcslfl =
+		(struct tcslfl *)&poi->redirect_tcslfl;
 	struct ehca_shca *shca =
 		container_of(ibdev, struct ehca_shca, ib_device);
 	struct ehca_sport *sport = &shca->sport[port_num - 1];
@@ -158,10 +174,29 @@
 		poi->base_version = 1;
 		poi->class_version = 1;
 		poi->resp_time_value = 18;
-		poi->redirect_lid = sport->saved_attr.lid;
-		poi->redirect_qp = sport->pma_qp_nr;
+
+		/* copy local routing information from WC where applicable */
+		tcslfl->sl         = in_wc->sl;
+		poi->redirect_lid  =
+			sport->saved_attr.lid | in_wc->dlid_path_bits;
+		poi->redirect_qp   = sport->pma_qp_nr;
 		poi->redirect_qkey = IB_QP1_QKEY;
-		poi->redirect_pkey = IB_DEFAULT_PKEY_FULL;
+
+		ehca_query_pkey(ibdev, port_num, in_wc->pkey_index,
+				&poi->redirect_pkey);
+
+		/* if request was globally routed, copy route info */
+		if (in_grh) {
+			struct vertcfl *vertcfl =
+				(struct vertcfl *)&in_grh->version_tclass_flow;
+			memcpy(poi->redirect_gid, in_grh->dgid.raw,
+			       sizeof(poi->redirect_gid));
+			tcslfl->tc        = vertcfl->tc;
+			tcslfl->fl        = vertcfl->fl;
+		} else
+			/* else only fill in default GID */
+			ehca_query_gid(ibdev, port_num, 0,
+				       (union ib_gid *)&poi->redirect_gid);
 
 		ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
 			 sport->saved_attr.lid, sport->pma_qp_nr);
@@ -183,8 +218,7 @@
 
 int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 		     struct ib_wc *in_wc, struct ib_grh *in_grh,
-		     struct ib_mad *in_mad,
-		     struct ib_mad *out_mad)
+		     struct ib_mad *in_mad, struct ib_mad *out_mad)
 {
 	int ret;
 
@@ -196,7 +230,8 @@
 		return IB_MAD_RESULT_SUCCESS;
 
 	ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
-	ret = ehca_process_perf(ibdev, port_num, in_mad, out_mad);
+	ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh,
+				in_mad, out_mad);
 
 	return ret;
 }

diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 2317398..38a2870 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c

@@ -1616,7 +1616,7 @@
 		pd->port_cnt = 1;
 		port_fp(fp) = pd;
 		pd->port_pid = get_pid(task_pid(current));
-		strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
+		strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
 		ipath_stats.sps_ports++;
 		ret = 0;
 	} else

diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 16a702d..ceb98ee 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c

@@ -60,7 +60,7 @@
 	if (smp->attr_mod)
 		smp->status |= IB_SMP_INVALID_FIELD;
 
-	strncpy(smp->data, ibdev->node_desc, sizeof(smp->data));
+	memcpy(smp->data, ibdev->node_desc, sizeof(smp->data));
 
 	return reply(smp);
 }

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index ae3d759..3cb3f47 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c

@@ -342,6 +342,9 @@
 	struct mlx4_ib_alloc_ucontext_resp resp;
 	int err;
 
+	if (!dev->ib_active)
+		return ERR_PTR(-EAGAIN);
+
 	resp.qp_tab_size      = dev->dev->caps.num_qps;
 	resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
 	resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
@@ -540,15 +543,11 @@
 
 static void *mlx4_ib_add(struct mlx4_dev *dev)
 {
-	static int mlx4_ib_version_printed;
 	struct mlx4_ib_dev *ibdev;
 	int num_ports = 0;
 	int i;
 
-	if (!mlx4_ib_version_printed) {
-		printk(KERN_INFO "%s", mlx4_ib_version);
-		++mlx4_ib_version_printed;
-	}
+	printk_once(KERN_INFO "%s", mlx4_ib_version);
 
 	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
 		num_ports++;
@@ -673,6 +672,8 @@
 			goto err_reg;
 	}
 
+	ibdev->ib_active = true;
+
 	return ibdev;
 
 err_reg:
@@ -729,6 +730,7 @@
 		break;
 
 	case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
+		ibdev->ib_active = false;
 		ibev.event = IB_EVENT_DEVICE_FATAL;
 		break;
 

diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 8a7dd67..3486d76 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h

@@ -175,6 +175,7 @@
 	spinlock_t		sm_lock;
 
 	struct mutex		cap_mask_mutex;
+	bool			ib_active;
 };
 
 static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index c4a0264..219b103 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c

@@ -615,10 +615,12 @@
 }
 
 static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+	__acquires(&send_cq->lock) __acquires(&recv_cq->lock)
 {
-	if (send_cq == recv_cq)
+	if (send_cq == recv_cq) {
 		spin_lock_irq(&send_cq->lock);
-	else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+		__acquire(&recv_cq->lock);
+	} else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
 		spin_lock_irq(&send_cq->lock);
 		spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
 	} else {
@@ -628,10 +630,12 @@
 }
 
 static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+	__releases(&send_cq->lock) __releases(&recv_cq->lock)
 {
-	if (send_cq == recv_cq)
+	if (send_cq == recv_cq) {
+		__release(&recv_cq->lock);
 		spin_unlock_irq(&send_cq->lock);
-	else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+	} else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
 		spin_unlock(&recv_cq->lock);
 		spin_unlock_irq(&send_cq->lock);
 	} else {

diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index 65ad359..056b2a4 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c

@@ -88,6 +88,7 @@
 	event.device = &dev->ib_dev;
 	event.event  = IB_EVENT_DEVICE_FATAL;
 	event.element.port_num = 0;
+	dev->active = false;
 
 	ib_dispatch_event(&event);
 

diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h
index 75671f7..155bc66 100644
--- a/drivers/infiniband/hw/mthca/mthca_config_reg.h
+++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h

@@ -34,8 +34,6 @@
 #ifndef MTHCA_CONFIG_REG_H
 #define MTHCA_CONFIG_REG_H
 
-#include <asm/page.h>
-
 #define MTHCA_HCR_BASE         0x80680
 #define MTHCA_HCR_SIZE         0x0001c
 #define MTHCA_ECR_BASE         0x80700

diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 9ef611f..7e6a6d6 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h

@@ -357,6 +357,7 @@
 	struct ib_ah         *sm_ah[MTHCA_MAX_PORTS];
 	spinlock_t            sm_lock;
 	u8                    rate[MTHCA_MAX_PORTS];
+	bool		      active;
 };
 
 #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG

diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 90e4e45..8c31fa3 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c

@@ -829,27 +829,34 @@
 
 	if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
 		static const char *eq_name[] = {
-			[MTHCA_EQ_COMP]  = DRV_NAME " (comp)",
-			[MTHCA_EQ_ASYNC] = DRV_NAME " (async)",
-			[MTHCA_EQ_CMD]   = DRV_NAME " (cmd)"
+			[MTHCA_EQ_COMP]  = DRV_NAME "-comp",
+			[MTHCA_EQ_ASYNC] = DRV_NAME "-async",
+			[MTHCA_EQ_CMD]   = DRV_NAME "-cmd"
 		};
 
 		for (i = 0; i < MTHCA_NUM_EQ; ++i) {
+			snprintf(dev->eq_table.eq[i].irq_name,
+				 IB_DEVICE_NAME_MAX,
+				 "%s@pci:%s", eq_name[i],
+				 pci_name(dev->pdev));
 			err = request_irq(dev->eq_table.eq[i].msi_x_vector,
 					  mthca_is_memfree(dev) ?
 					  mthca_arbel_msi_x_interrupt :
 					  mthca_tavor_msi_x_interrupt,
-					  0, eq_name[i], dev->eq_table.eq + i);
+					  0, dev->eq_table.eq[i].irq_name,
+					  dev->eq_table.eq + i);
 			if (err)
 				goto err_out_cmd;
 			dev->eq_table.eq[i].have_irq = 1;
 		}
 	} else {
+		snprintf(dev->eq_table.eq[0].irq_name, IB_DEVICE_NAME_MAX,
+			 DRV_NAME "@pci:%s", pci_name(dev->pdev));
 		err = request_irq(dev->pdev->irq,
 				  mthca_is_memfree(dev) ?
 				  mthca_arbel_interrupt :
 				  mthca_tavor_interrupt,
-				  IRQF_SHARED, DRV_NAME, dev);
+				  IRQF_SHARED, dev->eq_table.eq[0].irq_name, dev);
 		if (err)
 			goto err_out_cmd;
 		dev->eq_table.have_irq = 1;

diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 13da9f1..b01b2898 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c

@@ -1116,6 +1116,8 @@
 	pci_set_drvdata(pdev, mdev);
 	mdev->hca_type = hca_type;
 
+	mdev->active = true;
+
 	return 0;
 
 err_unregister:
@@ -1215,15 +1217,11 @@
 static int __devinit mthca_init_one(struct pci_dev *pdev,
 				    const struct pci_device_id *id)
 {
-	static int mthca_version_printed = 0;
 	int ret;
 
 	mutex_lock(&mthca_device_mutex);
 
-	if (!mthca_version_printed) {
-		printk(KERN_INFO "%s", mthca_version);
-		++mthca_version_printed;
-	}
+	printk_once(KERN_INFO "%s", mthca_version);
 
 	if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
 		printk(KERN_ERR PFX "%s has invalid driver data %lx\n",

diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 87ad889..bcf7a40 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c

@@ -334,6 +334,9 @@
 	struct mthca_ucontext           *context;
 	int                              err;
 
+	if (!(to_mdev(ibdev)->active))
+		return ERR_PTR(-EAGAIN);
+
 	memset(&uresp, 0, sizeof uresp);
 
 	uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;

diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index c621f87..90f4c4d 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h

@@ -113,6 +113,7 @@
 	int                    nent;
 	struct mthca_buf_list *page_list;
 	struct mthca_mr        mr;
+	char		       irq_name[IB_DEVICE_NAME_MAX];
 };
 
 struct mthca_av;

diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index f5081bf..c10576f 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c

@@ -1319,10 +1319,12 @@
 }
 
 static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+	__acquires(&send_cq->lock) __acquires(&recv_cq->lock)
 {
-	if (send_cq == recv_cq)
+	if (send_cq == recv_cq) {
 		spin_lock_irq(&send_cq->lock);
-	else if (send_cq->cqn < recv_cq->cqn) {
+		__acquire(&recv_cq->lock);
+	} else if (send_cq->cqn < recv_cq->cqn) {
 		spin_lock_irq(&send_cq->lock);
 		spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
 	} else {
@@ -1332,10 +1334,12 @@
 }
 
 static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+	__releases(&send_cq->lock) __releases(&recv_cq->lock)
 {
-	if (send_cq == recv_cq)
+	if (send_cq == recv_cq) {
+		__release(&recv_cq->lock);
 		spin_unlock_irq(&send_cq->lock);
-	else if (send_cq->cqn < recv_cq->cqn) {
+	} else if (send_cq->cqn < recv_cq->cqn) {
 		spin_unlock(&recv_cq->lock);
 		spin_unlock_irq(&send_cq->lock);
 	} else {

diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index acb6817f..2a13a16 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c

@@ -30,7 +30,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/pci.h>
 #include <linux/delay.h>

diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index bf1720f..bcc6abc 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h

@@ -523,7 +523,7 @@
 void nes_cm_disconn_worker(void *);
 
 /* nes_verbs.c */
-int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32);
+int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32, u32);
 int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
 struct nes_ib_device *nes_init_ofa_device(struct net_device *);
 void nes_destroy_ofa_device(struct nes_ib_device *);

diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 114b802..73473db 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c

@@ -2450,19 +2450,16 @@
  */
 int nes_cm_disconn(struct nes_qp *nesqp)
 {
-	unsigned long flags;
+	struct disconn_work *work;
 
-	spin_lock_irqsave(&nesqp->lock, flags);
-	if (nesqp->disconn_pending == 0) {
-		nesqp->disconn_pending++;
-		spin_unlock_irqrestore(&nesqp->lock, flags);
-		/* init our disconnect work element, to */
-		INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker);
+	work = kzalloc(sizeof *work, GFP_ATOMIC);
+	if (!work)
+		return -ENOMEM; /* Timer will clean up */
 
-		queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work);
-	} else
-		spin_unlock_irqrestore(&nesqp->lock, flags);
-
+	nes_add_ref(&nesqp->ibqp);
+	work->nesqp = nesqp;
+	INIT_WORK(&work->work, nes_disconnect_worker);
+	queue_work(g_cm_core->disconn_wq, &work->work);
 	return 0;
 }
 
@@ -2472,11 +2469,14 @@
  */
 static void nes_disconnect_worker(struct work_struct *work)
 {
-	struct nes_qp *nesqp = container_of(work, struct nes_qp, disconn_work);
+	struct disconn_work *dwork = container_of(work, struct disconn_work, work);
+	struct nes_qp *nesqp = dwork->nesqp;
 
+	kfree(dwork);
 	nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n",
 			nesqp->last_aeq, nesqp->hwqp.qp_id);
 	nes_cm_disconn_true(nesqp);
+	nes_rem_ref(&nesqp->ibqp);
 }
 
 
@@ -2493,7 +2493,12 @@
 	u16 last_ae;
 	u8 original_hw_tcp_state;
 	u8 original_ibqp_state;
-	u8 issued_disconnect_reset = 0;
+	enum iw_cm_event_type disconn_status = IW_CM_EVENT_STATUS_OK;
+	int issue_disconn = 0;
+	int issue_close = 0;
+	int issue_flush = 0;
+	u32 flush_q = NES_CQP_FLUSH_RQ;
+	struct ib_event ibevent;
 
 	if (!nesqp) {
 		nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n");
@@ -2517,24 +2522,55 @@
 	original_ibqp_state   = nesqp->ibqp_state;
 	last_ae = nesqp->last_aeq;
 
+	if (nesqp->term_flags) {
+		issue_disconn = 1;
+		issue_close = 1;
+		nesqp->cm_id = NULL;
+		if (nesqp->flush_issued == 0) {
+			nesqp->flush_issued = 1;
+			issue_flush = 1;
+		}
+	} else if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
+			((original_ibqp_state == IB_QPS_RTS) &&
+			(last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+		issue_disconn = 1;
+		if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET)
+			disconn_status = IW_CM_EVENT_STATUS_RESET;
+	}
 
-	nes_debug(NES_DBG_CM, "set ibqp_state=%u\n", nesqp->ibqp_state);
+	if (((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
+		 (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
+		 (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
+		 (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+		issue_close = 1;
+		nesqp->cm_id = NULL;
+		if (nesqp->flush_issued == 0) {
+			nesqp->flush_issued = 1;
+			issue_flush = 1;
+		}
+	}
 
-	if ((nesqp->cm_id) && (cm_id->event_handler)) {
-		if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
-				((original_ibqp_state == IB_QPS_RTS) &&
-				(last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+	spin_unlock_irqrestore(&nesqp->lock, flags);
+
+	if ((issue_flush) && (nesqp->destroyed == 0)) {
+		/* Flush the queue(s) */
+		if (nesqp->hw_iwarp_state >= NES_AEQE_IWARP_STATE_TERMINATE)
+			flush_q |= NES_CQP_FLUSH_SQ;
+		flush_wqes(nesvnic->nesdev, nesqp, flush_q, 1);
+
+		if (nesqp->term_flags) {
+			ibevent.device = nesqp->ibqp.device;
+			ibevent.event = nesqp->terminate_eventtype;
+			ibevent.element.qp = &nesqp->ibqp;
+			nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+		}
+	}
+
+	if ((cm_id) && (cm_id->event_handler)) {
+		if (issue_disconn) {
 			atomic_inc(&cm_disconnects);
 			cm_event.event = IW_CM_EVENT_DISCONNECT;
-			if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) {
-				cm_event.status = IW_CM_EVENT_STATUS_RESET;
-				nes_debug(NES_DBG_CM, "Generating a CM "
-					"Disconnect Event (status reset) for "
-					"QP%u, cm_id = %p. \n",
-					nesqp->hwqp.qp_id, cm_id);
-			} else
-				cm_event.status = IW_CM_EVENT_STATUS_OK;
-
+			cm_event.status = disconn_status;
 			cm_event.local_addr = cm_id->local_addr;
 			cm_event.remote_addr = cm_id->remote_addr;
 			cm_event.private_data = NULL;
@@ -2547,29 +2583,14 @@
 				nesqp->hwqp.sq_tail, cm_id,
 				atomic_read(&nesqp->refcount));
 
-			spin_unlock_irqrestore(&nesqp->lock, flags);
 			ret = cm_id->event_handler(cm_id, &cm_event);
 			if (ret)
 				nes_debug(NES_DBG_CM, "OFA CM event_handler "
 					"returned, ret=%d\n", ret);
-			spin_lock_irqsave(&nesqp->lock, flags);
 		}
 
-		nesqp->disconn_pending = 0;
-		/* There might have been another AE while the lock was released */
-		original_hw_tcp_state = nesqp->hw_tcp_state;
-		original_ibqp_state   = nesqp->ibqp_state;
-		last_ae = nesqp->last_aeq;
-
-		if ((issued_disconnect_reset == 0) && (nesqp->cm_id) &&
-				((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
-				 (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
-				 (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
-				 (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+		if (issue_close) {
 			atomic_inc(&cm_closes);
-			nesqp->cm_id = NULL;
-			nesqp->in_disconnect = 0;
-			spin_unlock_irqrestore(&nesqp->lock, flags);
 			nes_disconnect(nesqp, 1);
 
 			cm_id->provider_data = nesqp;
@@ -2588,28 +2609,7 @@
 			}
 
 			cm_id->rem_ref(cm_id);
-
-			spin_lock_irqsave(&nesqp->lock, flags);
-			if (nesqp->flush_issued == 0) {
-				nesqp->flush_issued = 1;
-				spin_unlock_irqrestore(&nesqp->lock, flags);
-				flush_wqes(nesvnic->nesdev, nesqp,
-					NES_CQP_FLUSH_RQ, 1);
-			} else
-				spin_unlock_irqrestore(&nesqp->lock, flags);
-		} else {
-			cm_id = nesqp->cm_id;
-			spin_unlock_irqrestore(&nesqp->lock, flags);
-			/* check to see if the inbound reset beat the outbound reset */
-			if ((!cm_id) && (last_ae==NES_AEQE_AEID_RESET_SENT)) {
-				nes_debug(NES_DBG_CM, "QP%u: Decing refcount "
-					"due to inbound reset beating the "
-					"outbound reset.\n", nesqp->hwqp.qp_id);
-			}
 		}
-	} else {
-		nesqp->disconn_pending = 0;
-		spin_unlock_irqrestore(&nesqp->lock, flags);
 	}
 
 	return 0;

diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 8b7e7c0..90e8e4d 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h

@@ -410,8 +410,6 @@
 int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *,
 		enum nes_timer_type, int, int);
 
-int nes_cm_disconn(struct nes_qp *);
-
 int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
 int nes_reject(struct iw_cm_id *, const void *, u8);
 int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *);

diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 4a84d02e..63a1a8e 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c

@@ -74,6 +74,8 @@
 static void process_critical_error(struct nes_device *nesdev);
 static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
 static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
+static void nes_terminate_timeout(unsigned long context);
+static void nes_terminate_start_timer(struct nes_qp *nesqp);
 
 #ifdef CONFIG_INFINIBAND_NES_DEBUG
 static unsigned char *nes_iwarp_state_str[] = {
@@ -2903,6 +2905,417 @@
 }
 
 
+static u8 *locate_mpa(u8 *pkt, u32 aeq_info)
+{
+	u16 pkt_len;
+
+	if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) {
+		/* skip over ethernet header */
+		pkt_len = be16_to_cpu(*(u16 *)(pkt + ETH_HLEN - 2));
+		pkt += ETH_HLEN;
+
+		/* Skip over IP and TCP headers */
+		pkt += 4 * (pkt[0] & 0x0f);
+		pkt += 4 * ((pkt[12] >> 4) & 0x0f);
+	}
+	return pkt;
+}
+
+/* Determine if incoming error pkt is rdma layer */
+static u32 iwarp_opcode(struct nes_qp *nesqp, u32 aeq_info)
+{
+	u8 *pkt;
+	u16 *mpa;
+	u32 opcode = 0xffffffff;
+
+	if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
+		pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
+		mpa = (u16 *)locate_mpa(pkt, aeq_info);
+		opcode = be16_to_cpu(mpa[1]) & 0xf;
+	}
+
+	return opcode;
+}
+
+/* Build iWARP terminate header */
+static int nes_bld_terminate_hdr(struct nes_qp *nesqp, u16 async_event_id, u32 aeq_info)
+{
+	u8 *pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
+	u16 ddp_seg_len;
+	int copy_len = 0;
+	u8 is_tagged = 0;
+	u8 flush_code = 0;
+	struct nes_terminate_hdr *termhdr;
+
+	termhdr = (struct nes_terminate_hdr *)nesqp->hwqp.q2_vbase;
+	memset(termhdr, 0, 64);
+
+	if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
+
+		/* Use data from offending packet to fill in ddp & rdma hdrs */
+		pkt = locate_mpa(pkt, aeq_info);
+		ddp_seg_len = be16_to_cpu(*(u16 *)pkt);
+		if (ddp_seg_len) {
+			copy_len = 2;
+			termhdr->hdrct = DDP_LEN_FLAG;
+			if (pkt[2] & 0x80) {
+				is_tagged = 1;
+				if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
+					copy_len += TERM_DDP_LEN_TAGGED;
+					termhdr->hdrct |= DDP_HDR_FLAG;
+				}
+			} else {
+				if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
+					copy_len += TERM_DDP_LEN_UNTAGGED;
+					termhdr->hdrct |= DDP_HDR_FLAG;
+				}
+
+				if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) {
+					if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) {
+						copy_len += TERM_RDMA_LEN;
+						termhdr->hdrct |= RDMA_HDR_FLAG;
+					}
+				}
+			}
+		}
+	}
+
+	switch (async_event_id) {
+	case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
+		switch (iwarp_opcode(nesqp, aeq_info)) {
+		case IWARP_OPCODE_WRITE:
+			flush_code = IB_WC_LOC_PROT_ERR;
+			termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+			termhdr->error_code = DDP_TAGGED_INV_STAG;
+			break;
+		default:
+			flush_code = IB_WC_REM_ACCESS_ERR;
+			termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+			termhdr->error_code = RDMAP_INV_STAG;
+		}
+		break;
+	case NES_AEQE_AEID_AMP_INVALID_STAG:
+		flush_code = IB_WC_REM_ACCESS_ERR;
+		termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+		termhdr->error_code = RDMAP_INV_STAG;
+		break;
+	case NES_AEQE_AEID_AMP_BAD_QP:
+		flush_code = IB_WC_LOC_QP_OP_ERR;
+		termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+		termhdr->error_code = DDP_UNTAGGED_INV_QN;
+		break;
+	case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
+	case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
+		switch (iwarp_opcode(nesqp, aeq_info)) {
+		case IWARP_OPCODE_SEND_INV:
+		case IWARP_OPCODE_SEND_SE_INV:
+			flush_code = IB_WC_REM_OP_ERR;
+			termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+			termhdr->error_code = RDMAP_CANT_INV_STAG;
+			break;
+		default:
+			flush_code = IB_WC_REM_ACCESS_ERR;
+			termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+			termhdr->error_code = RDMAP_INV_STAG;
+		}
+		break;
+	case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
+		if (aeq_info & (NES_AEQE_Q2_DATA_ETHERNET | NES_AEQE_Q2_DATA_MPA)) {
+			flush_code = IB_WC_LOC_PROT_ERR;
+			termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+			termhdr->error_code = DDP_TAGGED_BOUNDS;
+		} else {
+			flush_code = IB_WC_REM_ACCESS_ERR;
+			termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+			termhdr->error_code = RDMAP_INV_BOUNDS;
+		}
+		break;
+	case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
+	case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+	case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
+		flush_code = IB_WC_REM_ACCESS_ERR;
+		termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+		termhdr->error_code = RDMAP_ACCESS;
+		break;
+	case NES_AEQE_AEID_AMP_TO_WRAP:
+		flush_code = IB_WC_REM_ACCESS_ERR;
+		termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+		termhdr->error_code = RDMAP_TO_WRAP;
+		break;
+	case NES_AEQE_AEID_AMP_BAD_PD:
+		switch (iwarp_opcode(nesqp, aeq_info)) {
+		case IWARP_OPCODE_WRITE:
+			flush_code = IB_WC_LOC_PROT_ERR;
+			termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+			termhdr->error_code = DDP_TAGGED_UNASSOC_STAG;
+			break;
+		case IWARP_OPCODE_SEND_INV:
+		case IWARP_OPCODE_SEND_SE_INV:
+			flush_code = IB_WC_REM_ACCESS_ERR;
+			termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+			termhdr->error_code = RDMAP_CANT_INV_STAG;
+			break;
+		default:
+			flush_code = IB_WC_REM_ACCESS_ERR;
+			termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+			termhdr->error_code = RDMAP_UNASSOC_STAG;
+		}
+		break;
+	case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
+		flush_code = IB_WC_LOC_LEN_ERR;
+		termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
+		termhdr->error_code = MPA_MARKER;
+		break;
+	case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
+		flush_code = IB_WC_GENERAL_ERR;
+		termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
+		termhdr->error_code = MPA_CRC;
+		break;
+	case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
+	case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
+		flush_code = IB_WC_LOC_LEN_ERR;
+		termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
+		termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
+		break;
+	case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
+	case NES_AEQE_AEID_DDP_NO_L_BIT:
+		flush_code = IB_WC_FATAL_ERR;
+		termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
+		termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
+		break;
+	case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
+	case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
+		flush_code = IB_WC_GENERAL_ERR;
+		termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+		termhdr->error_code = DDP_UNTAGGED_INV_MSN_RANGE;
+		break;
+	case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+		flush_code = IB_WC_LOC_LEN_ERR;
+		termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+		termhdr->error_code = DDP_UNTAGGED_INV_TOO_LONG;
+		break;
+	case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
+		flush_code = IB_WC_GENERAL_ERR;
+		if (is_tagged) {
+			termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+			termhdr->error_code = DDP_TAGGED_INV_DDP_VER;
+		} else {
+			termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+			termhdr->error_code = DDP_UNTAGGED_INV_DDP_VER;
+		}
+		break;
+	case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
+		flush_code = IB_WC_GENERAL_ERR;
+		termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+		termhdr->error_code = DDP_UNTAGGED_INV_MO;
+		break;
+	case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+		flush_code = IB_WC_REM_OP_ERR;
+		termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+		termhdr->error_code = DDP_UNTAGGED_INV_MSN_NO_BUF;
+		break;
+	case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
+		flush_code = IB_WC_GENERAL_ERR;
+		termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+		termhdr->error_code = DDP_UNTAGGED_INV_QN;
+		break;
+	case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
+		flush_code = IB_WC_GENERAL_ERR;
+		termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+		termhdr->error_code = RDMAP_INV_RDMAP_VER;
+		break;
+	case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
+		flush_code = IB_WC_LOC_QP_OP_ERR;
+		termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+		termhdr->error_code = RDMAP_UNEXPECTED_OP;
+		break;
+	default:
+		flush_code = IB_WC_FATAL_ERR;
+		termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+		termhdr->error_code = RDMAP_UNSPECIFIED;
+		break;
+	}
+
+	if (copy_len)
+		memcpy(termhdr + 1, pkt, copy_len);
+
+	if ((flush_code) && ((NES_AEQE_INBOUND_RDMA & aeq_info) == 0)) {
+		if (aeq_info & NES_AEQE_SQ)
+			nesqp->term_sq_flush_code = flush_code;
+		else
+			nesqp->term_rq_flush_code = flush_code;
+	}
+
+	return sizeof(struct nes_terminate_hdr) + copy_len;
+}
+
+static void nes_terminate_connection(struct nes_device *nesdev, struct nes_qp *nesqp,
+		 struct nes_hw_aeqe *aeqe, enum ib_event_type eventtype)
+{
+	u64 context;
+	unsigned long flags;
+	u32 aeq_info;
+	u16 async_event_id;
+	u8 tcp_state;
+	u8 iwarp_state;
+	u32 termlen = 0;
+	u32 mod_qp_flags = NES_CQP_QP_IWARP_STATE_TERMINATE |
+			   NES_CQP_QP_TERM_DONT_SEND_FIN;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+
+	if (nesqp->term_flags & NES_TERM_SENT)
+		return; /* Sanity check */
+
+	aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+	tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
+	iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
+	async_event_id = (u16)aeq_info;
+
+	context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
+		aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
+	if (!context) {
+		WARN_ON(!context);
+		return;
+	}
+
+	nesqp = (struct nes_qp *)(unsigned long)context;
+	spin_lock_irqsave(&nesqp->lock, flags);
+	nesqp->hw_iwarp_state = iwarp_state;
+	nesqp->hw_tcp_state = tcp_state;
+	nesqp->last_aeq = async_event_id;
+	nesqp->terminate_eventtype = eventtype;
+	spin_unlock_irqrestore(&nesqp->lock, flags);
+
+	if (nesadapter->send_term_ok)
+		termlen = nes_bld_terminate_hdr(nesqp, async_event_id, aeq_info);
+	else
+		mod_qp_flags |= NES_CQP_QP_TERM_DONT_SEND_TERM_MSG;
+
+	nes_terminate_start_timer(nesqp);
+	nesqp->term_flags |= NES_TERM_SENT;
+	nes_hw_modify_qp(nesdev, nesqp, mod_qp_flags, termlen, 0);
+}
+
+static void nes_terminate_send_fin(struct nes_device *nesdev,
+			  struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
+{
+	u32 aeq_info;
+	u16 async_event_id;
+	u8 tcp_state;
+	u8 iwarp_state;
+	unsigned long flags;
+
+	aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+	tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
+	iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
+	async_event_id = (u16)aeq_info;
+
+	spin_lock_irqsave(&nesqp->lock, flags);
+	nesqp->hw_iwarp_state = iwarp_state;
+	nesqp->hw_tcp_state = tcp_state;
+	nesqp->last_aeq = async_event_id;
+	spin_unlock_irqrestore(&nesqp->lock, flags);
+
+	/* Send the fin only */
+	nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_TERMINATE |
+		NES_CQP_QP_TERM_DONT_SEND_TERM_MSG, 0, 0);
+}
+
+/* Cleanup after a terminate sent or received */
+static void nes_terminate_done(struct nes_qp *nesqp, int timeout_occurred)
+{
+	u32 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
+	unsigned long flags;
+	struct nes_vnic *nesvnic = to_nesvnic(nesqp->ibqp.device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	u8 first_time = 0;
+
+	spin_lock_irqsave(&nesqp->lock, flags);
+	if (nesqp->hte_added) {
+		nesqp->hte_added = 0;
+		next_iwarp_state |= NES_CQP_QP_DEL_HTE;
+	}
+
+	first_time = (nesqp->term_flags & NES_TERM_DONE) == 0;
+	nesqp->term_flags |= NES_TERM_DONE;
+	spin_unlock_irqrestore(&nesqp->lock, flags);
+
+	/* Make sure we go through this only once */
+	if (first_time) {
+		if (timeout_occurred == 0)
+			del_timer(&nesqp->terminate_timer);
+		else
+			next_iwarp_state |= NES_CQP_QP_RESET;
+
+		nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
+		nes_cm_disconn(nesqp);
+	}
+}
+
+static void nes_terminate_received(struct nes_device *nesdev,
+				struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
+{
+	u32 aeq_info;
+	u8 *pkt;
+	u32 *mpa;
+	u8 ddp_ctl;
+	u8 rdma_ctl;
+	u16 aeq_id = 0;
+
+	aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+	if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
+		/* Terminate is not a performance path so the silicon */
+		/* did not validate the frame - do it now */
+		pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
+		mpa = (u32 *)locate_mpa(pkt, aeq_info);
+		ddp_ctl = (be32_to_cpu(mpa[0]) >> 8) & 0xff;
+		rdma_ctl = be32_to_cpu(mpa[0]) & 0xff;
+		if ((ddp_ctl & 0xc0) != 0x40)
+			aeq_id = NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC;
+		else if ((ddp_ctl & 0x03) != 1)
+			aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION;
+		else if (be32_to_cpu(mpa[2]) != 2)
+			aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_QN;
+		else if (be32_to_cpu(mpa[3]) != 1)
+			aeq_id = NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN;
+		else if (be32_to_cpu(mpa[4]) != 0)
+			aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_MO;
+		else if ((rdma_ctl & 0xc0) != 0x40)
+			aeq_id = NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION;
+
+		if (aeq_id) {
+			/* Bad terminate recvd - send back a terminate */
+			aeq_info = (aeq_info & 0xffff0000) | aeq_id;
+			aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
+			nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
+			return;
+		}
+	}
+
+	nesqp->term_flags |= NES_TERM_RCVD;
+	nesqp->terminate_eventtype = IB_EVENT_QP_FATAL;
+	nes_terminate_start_timer(nesqp);
+	nes_terminate_send_fin(nesdev, nesqp, aeqe);
+}
+
+/* Timeout routine in case terminate fails to complete */
+static void nes_terminate_timeout(unsigned long context)
+{
+	struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context;
+
+	nes_terminate_done(nesqp, 1);
+}
+
+/* Set a timer in case hw cannot complete the terminate sequence */
+static void nes_terminate_start_timer(struct nes_qp *nesqp)
+{
+	init_timer(&nesqp->terminate_timer);
+	nesqp->terminate_timer.function = nes_terminate_timeout;
+	nesqp->terminate_timer.expires = jiffies + HZ;
+	nesqp->terminate_timer.data = (unsigned long)nesqp;
+	add_timer(&nesqp->terminate_timer);
+}
+
 /**
  * nes_process_iwarp_aeqe
  */
@@ -2910,28 +3323,27 @@
 				   struct nes_hw_aeqe *aeqe)
 {
 	u64 context;
-	u64 aeqe_context = 0;
 	unsigned long flags;
 	struct nes_qp *nesqp;
+	struct nes_hw_cq *hw_cq;
+	struct nes_cq *nescq;
 	int resource_allocated;
-	/* struct iw_cm_id *cm_id; */
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
-	struct ib_event ibevent;
-	/* struct iw_cm_event cm_event; */
 	u32 aeq_info;
 	u32 next_iwarp_state = 0;
 	u16 async_event_id;
 	u8 tcp_state;
 	u8 iwarp_state;
+	int must_disconn = 1;
+	int must_terminate = 0;
+	struct ib_event ibevent;
 
 	nes_debug(NES_DBG_AEQ, "\n");
 	aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
-	if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) {
+	if ((NES_AEQE_INBOUND_RDMA & aeq_info) || (!(NES_AEQE_QP & aeq_info))) {
 		context  = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
 		context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
 	} else {
-		aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
-		aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
 		context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
 						aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
 		BUG_ON(!context);
@@ -2948,7 +3360,11 @@
 
 	switch (async_event_id) {
 		case NES_AEQE_AEID_LLP_FIN_RECEIVED:
-			nesqp = *((struct nes_qp **)&context);
+			nesqp = (struct nes_qp *)(unsigned long)context;
+
+			if (nesqp->term_flags)
+				return; /* Ignore it, wait for close complete */
+
 			if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
 				nesqp->cm_id->add_ref(nesqp->cm_id);
 				schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp,
@@ -2959,18 +3375,24 @@
 						nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
 						async_event_id, nesqp->last_aeq, tcp_state);
 			}
+
 			if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
 					(nesqp->ibqp_state != IB_QPS_RTS)) {
 				/* FIN Received but tcp state or IB state moved on,
 						should expect a	close complete */
 				return;
 			}
+
 		case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
+			nesqp = (struct nes_qp *)(unsigned long)context;
+			if (nesqp->term_flags) {
+				nes_terminate_done(nesqp, 0);
+				return;
+			}
+
 		case NES_AEQE_AEID_LLP_CONNECTION_RESET:
-		case NES_AEQE_AEID_TERMINATE_SENT:
-		case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
 		case NES_AEQE_AEID_RESET_SENT:
-			nesqp = *((struct nes_qp **)&context);
+			nesqp = (struct nes_qp *)(unsigned long)context;
 			if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
 				tcp_state = NES_AEQE_TCP_STATE_CLOSED;
 			}
@@ -2982,12 +3404,7 @@
 			if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
 					(tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) {
 				nesqp->hte_added = 0;
-				spin_unlock_irqrestore(&nesqp->lock, flags);
-				nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u to remove hte\n",
-						nesqp->hwqp.qp_id);
-				nes_hw_modify_qp(nesdev, nesqp,
-						NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE, 0);
-				spin_lock_irqsave(&nesqp->lock, flags);
+				next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
 			}
 
 			if ((nesqp->ibqp_state == IB_QPS_RTS) &&
@@ -2999,151 +3416,106 @@
 						nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
 						break;
 					case NES_AEQE_IWARP_STATE_TERMINATE:
-						next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
-						nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
-						if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
-							next_iwarp_state |= 0x02000000;
-							nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
-						}
+						must_disconn = 0; /* terminate path takes care of disconn */
+						if (nesqp->term_flags == 0)
+							must_terminate = 1;
 						break;
-					default:
-						next_iwarp_state = 0;
 				}
-				spin_unlock_irqrestore(&nesqp->lock, flags);
-				if (next_iwarp_state) {
-					nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
-							" also added another reference\n",
-							nesqp->hwqp.qp_id, next_iwarp_state);
-					nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
-				}
-				nes_cm_disconn(nesqp);
 			} else {
 				if (async_event_id ==  NES_AEQE_AEID_LLP_FIN_RECEIVED) {
 					/* FIN Received but ib state not RTS,
 							close complete will be on its way */
-					spin_unlock_irqrestore(&nesqp->lock, flags);
-					return;
+					must_disconn = 0;
 				}
-				spin_unlock_irqrestore(&nesqp->lock, flags);
-				if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
-					next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000;
-					nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
-					nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
-							" also added another reference\n",
-							nesqp->hwqp.qp_id, next_iwarp_state);
-					nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
+			}
+			spin_unlock_irqrestore(&nesqp->lock, flags);
+
+			if (must_terminate)
+				nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
+			else if (must_disconn) {
+				if (next_iwarp_state) {
+					nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X\n",
+						  nesqp->hwqp.qp_id, next_iwarp_state);
+					nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
 				}
 				nes_cm_disconn(nesqp);
 			}
 			break;
+
+		case NES_AEQE_AEID_TERMINATE_SENT:
+			nesqp = (struct nes_qp *)(unsigned long)context;
+			nes_terminate_send_fin(nesdev, nesqp, aeqe);
+			break;
+
 		case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
-			nesqp = *((struct nes_qp **)&context);
-			spin_lock_irqsave(&nesqp->lock, flags);
-			nesqp->hw_iwarp_state = iwarp_state;
-			nesqp->hw_tcp_state = tcp_state;
-			nesqp->last_aeq = async_event_id;
-			spin_unlock_irqrestore(&nesqp->lock, flags);
-			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TERMINATE_RECEIVED"
-					" event on QP%u \n  Q2 Data:\n",
-					nesqp->hwqp.qp_id);
-			if (nesqp->ibqp.event_handler) {
-				ibevent.device = nesqp->ibqp.device;
-				ibevent.element.qp = &nesqp->ibqp;
-				ibevent.event = IB_EVENT_QP_FATAL;
-				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-			}
-			if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
-					((nesqp->ibqp_state == IB_QPS_RTS)&&
-					(async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
-				nes_cm_disconn(nesqp);
-			} else {
-				nesqp->in_disconnect = 0;
-				wake_up(&nesqp->kick_waitq);
-			}
+			nesqp = (struct nes_qp *)(unsigned long)context;
+			nes_terminate_received(nesdev, nesqp, aeqe);
 			break;
-		case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
-			nesqp = *((struct nes_qp **)&context);
-			spin_lock_irqsave(&nesqp->lock, flags);
-			nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR;
-			nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
-			nesqp->last_aeq = async_event_id;
-			if (nesqp->cm_id) {
-				nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
-						" event on QP%u, remote IP = 0x%08X \n",
-						nesqp->hwqp.qp_id,
-						ntohl(nesqp->cm_id->remote_addr.sin_addr.s_addr));
-			} else {
-				nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
-						" event on QP%u \n",
-						nesqp->hwqp.qp_id);
-			}
-			spin_unlock_irqrestore(&nesqp->lock, flags);
-			next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_RESET;
-			nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
-			if (nesqp->ibqp.event_handler) {
-				ibevent.device = nesqp->ibqp.device;
-				ibevent.element.qp = &nesqp->ibqp;
-				ibevent.event = IB_EVENT_QP_FATAL;
-				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-			}
-			break;
+
+		case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
 		case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
-			if (NES_AEQE_INBOUND_RDMA&aeq_info) {
-				nesqp = nesadapter->qp_table[le32_to_cpu(
-						aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
-			} else {
-				/* TODO: get the actual WQE and mask off wqe index */
-				context &= ~((u64)511);
-				nesqp = *((struct nes_qp **)&context);
-			}
-			spin_lock_irqsave(&nesqp->lock, flags);
-			nesqp->hw_iwarp_state = iwarp_state;
-			nesqp->hw_tcp_state = tcp_state;
-			nesqp->last_aeq = async_event_id;
-			spin_unlock_irqrestore(&nesqp->lock, flags);
-			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_BAD_STAG_INDEX event on QP%u\n",
-					nesqp->hwqp.qp_id);
-			if (nesqp->ibqp.event_handler) {
-				ibevent.device = nesqp->ibqp.device;
-				ibevent.element.qp = &nesqp->ibqp;
-				ibevent.event = IB_EVENT_QP_ACCESS_ERR;
-				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-			}
-			break;
 		case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
-			nesqp = *((struct nes_qp **)&context);
-			spin_lock_irqsave(&nesqp->lock, flags);
-			nesqp->hw_iwarp_state = iwarp_state;
-			nesqp->hw_tcp_state = tcp_state;
-			nesqp->last_aeq = async_event_id;
-			spin_unlock_irqrestore(&nesqp->lock, flags);
-			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_UNALLOCATED_STAG event on QP%u\n",
-					nesqp->hwqp.qp_id);
-			if (nesqp->ibqp.event_handler) {
-				ibevent.device = nesqp->ibqp.device;
-				ibevent.element.qp = &nesqp->ibqp;
-				ibevent.event = IB_EVENT_QP_ACCESS_ERR;
-				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-			}
-			break;
+		case NES_AEQE_AEID_AMP_INVALID_STAG:
+		case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
+		case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
 		case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
-			nesqp = nesadapter->qp_table[le32_to_cpu(aeqe->aeqe_words
-					[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
-			spin_lock_irqsave(&nesqp->lock, flags);
-			nesqp->hw_iwarp_state = iwarp_state;
-			nesqp->hw_tcp_state = tcp_state;
-			nesqp->last_aeq = async_event_id;
-			spin_unlock_irqrestore(&nesqp->lock, flags);
-			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_PRIV_OPERATION_DENIED event on QP%u,"
-					" nesqp = %p, AE reported %p\n",
-					nesqp->hwqp.qp_id, nesqp, *((struct nes_qp **)&context));
-			if (nesqp->ibqp.event_handler) {
-				ibevent.device = nesqp->ibqp.device;
-				ibevent.element.qp = &nesqp->ibqp;
-				ibevent.event = IB_EVENT_QP_ACCESS_ERR;
-				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-			}
+		case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+		case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
+		case NES_AEQE_AEID_AMP_TO_WRAP:
+			nesqp = (struct nes_qp *)(unsigned long)context;
+			nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR);
 			break;
+
+		case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
+		case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
+		case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
+		case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
+			nesqp = (struct nes_qp *)(unsigned long)context;
+			if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) {
+				aeq_info &= 0xffff0000;
+				aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE;
+				aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
+			}
+
+		case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
+		case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
+		case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+		case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
+		case NES_AEQE_AEID_AMP_BAD_QP:
+		case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
+		case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
+		case NES_AEQE_AEID_DDP_NO_L_BIT:
+		case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
+		case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
+		case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
+		case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
+		case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
+		case NES_AEQE_AEID_AMP_BAD_PD:
+		case NES_AEQE_AEID_AMP_FASTREG_SHARED:
+		case NES_AEQE_AEID_AMP_FASTREG_VALID_STAG:
+		case NES_AEQE_AEID_AMP_FASTREG_MW_STAG:
+		case NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS:
+		case NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW:
+		case NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH:
+		case NES_AEQE_AEID_AMP_INVALIDATE_SHARED:
+		case NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS:
+		case NES_AEQE_AEID_AMP_MWBIND_VALID_STAG:
+		case NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG:
+		case NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG:
+		case NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG:
+		case NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS:
+		case NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS:
+		case NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT:
+		case NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED:
+		case NES_AEQE_AEID_BAD_CLOSE:
+		case NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO:
+		case NES_AEQE_AEID_STAG_ZERO_INVALID:
+		case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST:
+		case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
+			nesqp = (struct nes_qp *)(unsigned long)context;
+			nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
+			break;
+
 		case NES_AEQE_AEID_CQ_OPERATION_ERROR:
 			context <<= 1;
 			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n",
@@ -3153,83 +3525,19 @@
 			if (resource_allocated) {
 				printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n",
 						__func__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
+				hw_cq = (struct nes_hw_cq *)(unsigned long)context;
+				if (hw_cq) {
+					nescq = container_of(hw_cq, struct nes_cq, hw_cq);
+					if (nescq->ibcq.event_handler) {
+						ibevent.device = nescq->ibcq.device;
+						ibevent.event = IB_EVENT_CQ_ERR;
+						ibevent.element.cq = &nescq->ibcq;
+						nescq->ibcq.event_handler(&ibevent, nescq->ibcq.cq_context);
+					}
+				}
 			}
 			break;
-		case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
-			nesqp = nesadapter->qp_table[le32_to_cpu(
-					aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
-			spin_lock_irqsave(&nesqp->lock, flags);
-			nesqp->hw_iwarp_state = iwarp_state;
-			nesqp->hw_tcp_state = tcp_state;
-			nesqp->last_aeq = async_event_id;
-			spin_unlock_irqrestore(&nesqp->lock, flags);
-			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG"
-					"_FOR_AVAILABLE_BUFFER event on QP%u\n",
-					nesqp->hwqp.qp_id);
-			if (nesqp->ibqp.event_handler) {
-				ibevent.device = nesqp->ibqp.device;
-				ibevent.element.qp = &nesqp->ibqp;
-				ibevent.event = IB_EVENT_QP_ACCESS_ERR;
-				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-			}
-			/* tell cm to disconnect, cm will queue work to thread */
-			nes_cm_disconn(nesqp);
-			break;
-		case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
-			nesqp = *((struct nes_qp **)&context);
-			spin_lock_irqsave(&nesqp->lock, flags);
-			nesqp->hw_iwarp_state = iwarp_state;
-			nesqp->hw_tcp_state = tcp_state;
-			nesqp->last_aeq = async_event_id;
-			spin_unlock_irqrestore(&nesqp->lock, flags);
-			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_INVALID_MSN"
-					"_NO_BUFFER_AVAILABLE event on QP%u\n",
-					nesqp->hwqp.qp_id);
-			if (nesqp->ibqp.event_handler) {
-				ibevent.device = nesqp->ibqp.device;
-				ibevent.element.qp = &nesqp->ibqp;
-				ibevent.event = IB_EVENT_QP_FATAL;
-				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-			}
-			/* tell cm to disconnect, cm will queue work to thread */
-			nes_cm_disconn(nesqp);
-			break;
-		case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
-			nesqp = *((struct nes_qp **)&context);
-			spin_lock_irqsave(&nesqp->lock, flags);
-			nesqp->hw_iwarp_state = iwarp_state;
-			nesqp->hw_tcp_state = tcp_state;
-			nesqp->last_aeq = async_event_id;
-			spin_unlock_irqrestore(&nesqp->lock, flags);
-			nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR"
-					" event on QP%u \n  Q2 Data:\n",
-					nesqp->hwqp.qp_id);
-			if (nesqp->ibqp.event_handler) {
-				ibevent.device = nesqp->ibqp.device;
-				ibevent.element.qp = &nesqp->ibqp;
-				ibevent.event = IB_EVENT_QP_FATAL;
-				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-			}
-			/* tell cm to disconnect, cm will queue work to thread */
-			nes_cm_disconn(nesqp);
-			break;
-			/* TODO: additional AEs need to be here */
-		case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
-			nesqp = *((struct nes_qp **)&context);
-			spin_lock_irqsave(&nesqp->lock, flags);
-			nesqp->hw_iwarp_state = iwarp_state;
-			nesqp->hw_tcp_state = tcp_state;
-			nesqp->last_aeq = async_event_id;
-			spin_unlock_irqrestore(&nesqp->lock, flags);
-			if (nesqp->ibqp.event_handler) {
-				ibevent.device = nesqp->ibqp.device;
-				ibevent.element.qp = &nesqp->ibqp;
-				ibevent.event = IB_EVENT_QP_ACCESS_ERR;
-				nesqp->ibqp.event_handler(&ibevent,
-						nesqp->ibqp.qp_context);
-			}
-			nes_cm_disconn(nesqp);
-			break;
+
 		default:
 			nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n",
 					async_event_id);
@@ -3238,7 +3546,6 @@
 
 }
 
-
 /**
  * nes_iwarp_ce_handler
  */
@@ -3373,6 +3680,8 @@
 {
 	struct nes_cqp_request *cqp_request;
 	struct nes_hw_cqp_wqe *cqp_wqe;
+	u32 sq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
+	u32 rq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
 	int ret;
 
 	cqp_request = nes_get_cqp_request(nesdev);
@@ -3389,6 +3698,24 @@
 	cqp_wqe = &cqp_request->cqp_wqe;
 	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
 
+	/* If wqe in error was identified, set code to be put into cqe */
+	if ((nesqp->term_sq_flush_code) && (which_wq & NES_CQP_FLUSH_SQ)) {
+		which_wq |= NES_CQP_FLUSH_MAJ_MIN;
+		sq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_sq_flush_code;
+		nesqp->term_sq_flush_code = 0;
+	}
+
+	if ((nesqp->term_rq_flush_code) && (which_wq & NES_CQP_FLUSH_RQ)) {
+		which_wq |= NES_CQP_FLUSH_MAJ_MIN;
+		rq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_rq_flush_code;
+		nesqp->term_rq_flush_code = 0;
+	}
+
+	if (which_wq & NES_CQP_FLUSH_MAJ_MIN) {
+		cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_SQ_CODE] = cpu_to_le32(sq_code);
+		cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_RQ_CODE] = cpu_to_le32(rq_code);
+	}
+
 	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
 			cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq);
 	cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id);

diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index c3654c6..f28a41b 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h

@@ -241,6 +241,7 @@
 };
 
 #define NES_CQP_OP_IWARP_STATE_SHIFT 28
+#define NES_CQP_OP_TERMLEN_SHIFT     28
 
 enum nes_cqp_qp_bits {
 	NES_CQP_QP_ARP_VALID = (1<<8),
@@ -265,12 +266,16 @@
 	NES_CQP_QP_IWARP_STATE_TERMINATE = (5<<NES_CQP_OP_IWARP_STATE_SHIFT),
 	NES_CQP_QP_IWARP_STATE_ERROR = (6<<NES_CQP_OP_IWARP_STATE_SHIFT),
 	NES_CQP_QP_IWARP_STATE_MASK = (7<<NES_CQP_OP_IWARP_STATE_SHIFT),
+	NES_CQP_QP_TERM_DONT_SEND_FIN = (1<<24),
+	NES_CQP_QP_TERM_DONT_SEND_TERM_MSG = (1<<25),
 	NES_CQP_QP_RESET = (1<<31),
 };
 
 enum nes_cqp_qp_wqe_word_idx {
 	NES_CQP_QP_WQE_CONTEXT_LOW_IDX = 6,
 	NES_CQP_QP_WQE_CONTEXT_HIGH_IDX = 7,
+	NES_CQP_QP_WQE_FLUSH_SQ_CODE = 8,
+	NES_CQP_QP_WQE_FLUSH_RQ_CODE = 9,
 	NES_CQP_QP_WQE_NEW_MSS_IDX = 15,
 };
 
@@ -361,6 +366,7 @@
 enum nes_cqp_flush_bits {
 	NES_CQP_FLUSH_SQ = (1<<30),
 	NES_CQP_FLUSH_RQ = (1<<31),
+	NES_CQP_FLUSH_MAJ_MIN = (1<<28),
 };
 
 enum nes_cqe_opcode_bits {
@@ -633,11 +639,14 @@
 	NES_AEQE_INBOUND_RDMA = (1<<19),
 	NES_AEQE_IWARP_STATE_MASK = (7<<20),
 	NES_AEQE_TCP_STATE_MASK = (0xf<<24),
+	NES_AEQE_Q2_DATA_WRITTEN = (0x3<<28),
 	NES_AEQE_VALID = (1<<31),
 };
 
 #define NES_AEQE_IWARP_STATE_SHIFT	20
 #define NES_AEQE_TCP_STATE_SHIFT	24
+#define NES_AEQE_Q2_DATA_ETHERNET       (1<<28)
+#define NES_AEQE_Q2_DATA_MPA            (1<<29)
 
 enum nes_aeqe_iwarp_state {
 	NES_AEQE_IWARP_STATE_NON_EXISTANT = 0,
@@ -751,6 +760,15 @@
 	NES_IWARP_SQ_OP_NOP = 12,
 };
 
+enum nes_iwarp_cqe_major_code {
+	NES_IWARP_CQE_MAJOR_FLUSH = 1,
+	NES_IWARP_CQE_MAJOR_DRV = 0x8000
+};
+
+enum nes_iwarp_cqe_minor_code {
+	NES_IWARP_CQE_MINOR_FLUSH = 1
+};
+
 #define NES_EEPROM_READ_REQUEST (1<<16)
 #define NES_MAC_ADDR_VALID      (1<<20)
 
@@ -1119,6 +1137,7 @@
 	u8            netdev_max;	/* from host nic address count in EEPROM */
 	u8            port_count;
 	u8            virtwq;
+	u8            send_term_ok;
 	u8            et_use_adaptive_rx_coalesce;
 	u8            adapter_fcn_count;
 	u8 pft_mcast_map[NES_PFT_SIZE];
@@ -1217,6 +1236,90 @@
 	u32 num_pd;
 };
 
+enum nes_hdrct_flags {
+	DDP_LEN_FLAG                    = 0x80,
+	DDP_HDR_FLAG                    = 0x40,
+	RDMA_HDR_FLAG                   = 0x20
+};
+
+enum nes_term_layers {
+	LAYER_RDMA			= 0,
+	LAYER_DDP			= 1,
+	LAYER_MPA			= 2
+};
+
+enum nes_term_error_types {
+	RDMAP_CATASTROPHIC		= 0,
+	RDMAP_REMOTE_PROT		= 1,
+	RDMAP_REMOTE_OP			= 2,
+	DDP_CATASTROPHIC		= 0,
+	DDP_TAGGED_BUFFER		= 1,
+	DDP_UNTAGGED_BUFFER		= 2,
+	DDP_LLP				= 3
+};
+
+enum nes_term_rdma_errors {
+	RDMAP_INV_STAG			= 0x00,
+	RDMAP_INV_BOUNDS		= 0x01,
+	RDMAP_ACCESS			= 0x02,
+	RDMAP_UNASSOC_STAG		= 0x03,
+	RDMAP_TO_WRAP			= 0x04,
+	RDMAP_INV_RDMAP_VER		= 0x05,
+	RDMAP_UNEXPECTED_OP		= 0x06,
+	RDMAP_CATASTROPHIC_LOCAL	= 0x07,
+	RDMAP_CATASTROPHIC_GLOBAL	= 0x08,
+	RDMAP_CANT_INV_STAG		= 0x09,
+	RDMAP_UNSPECIFIED		= 0xff
+};
+
+enum nes_term_ddp_errors {
+	DDP_CATASTROPHIC_LOCAL		= 0x00,
+	DDP_TAGGED_INV_STAG		= 0x00,
+	DDP_TAGGED_BOUNDS		= 0x01,
+	DDP_TAGGED_UNASSOC_STAG		= 0x02,
+	DDP_TAGGED_TO_WRAP		= 0x03,
+	DDP_TAGGED_INV_DDP_VER		= 0x04,
+	DDP_UNTAGGED_INV_QN		= 0x01,
+	DDP_UNTAGGED_INV_MSN_NO_BUF	= 0x02,
+	DDP_UNTAGGED_INV_MSN_RANGE	= 0x03,
+	DDP_UNTAGGED_INV_MO		= 0x04,
+	DDP_UNTAGGED_INV_TOO_LONG	= 0x05,
+	DDP_UNTAGGED_INV_DDP_VER	= 0x06
+};
+
+enum nes_term_mpa_errors {
+	MPA_CLOSED			= 0x01,
+	MPA_CRC				= 0x02,
+	MPA_MARKER			= 0x03,
+	MPA_REQ_RSP			= 0x04,
+};
+
+struct nes_terminate_hdr {
+	u8 layer_etype;
+	u8 error_code;
+	u8 hdrct;
+	u8 rsvd;
+};
+
+/* Used to determine how to fill in terminate error codes */
+#define IWARP_OPCODE_WRITE		0
+#define IWARP_OPCODE_READREQ		1
+#define IWARP_OPCODE_READRSP		2
+#define IWARP_OPCODE_SEND		3
+#define IWARP_OPCODE_SEND_INV		4
+#define IWARP_OPCODE_SEND_SE		5
+#define IWARP_OPCODE_SEND_SE_INV	6
+#define IWARP_OPCODE_TERM		7
+
+/* These values are used only during terminate processing */
+#define TERM_DDP_LEN_TAGGED	14
+#define TERM_DDP_LEN_UNTAGGED	18
+#define TERM_RDMA_LEN		28
+#define RDMA_OPCODE_MASK	0x0f
+#define RDMA_READ_REQ_OPCODE	1
+#define BAD_FRAME_OFFSET	64
+#define CQE_MAJOR_DRV		0x8000
+
 #define nes_vlan_rx vlan_hwaccel_receive_skb
 #define nes_netif_rx netif_receive_skb
 

diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index a282031..9687c39 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c

@@ -183,6 +183,9 @@
 		} else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) {
 			nesadapter->virtwq = 1;
 		}
+		if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3))
+			nesadapter->send_term_ok = 1;
+
 		nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8))  <<  16) +
 				(u32)((u8)eeprom_data);
 
@@ -548,7 +551,7 @@
 		spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
 	}
 	if (cqp_request == NULL) {
-		cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL);
+		cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_ATOMIC);
 		if (cqp_request) {
 			cqp_request->dynamic = 1;
 			INIT_LIST_HEAD(&cqp_request->list);

diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 21e0fd3..a680c42 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c

@@ -667,15 +667,32 @@
  */
 static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props)
 {
+	struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+	struct net_device *netdev = nesvnic->netdev;
+
 	memset(props, 0, sizeof(*props));
 
-	props->max_mtu = IB_MTU_2048;
-	props->active_mtu = IB_MTU_2048;
+	props->max_mtu = IB_MTU_4096;
+
+	if (netdev->mtu  >= 4096)
+		props->active_mtu = IB_MTU_4096;
+	else if (netdev->mtu  >= 2048)
+		props->active_mtu = IB_MTU_2048;
+	else if (netdev->mtu  >= 1024)
+		props->active_mtu = IB_MTU_1024;
+	else if (netdev->mtu  >= 512)
+		props->active_mtu = IB_MTU_512;
+	else
+		props->active_mtu = IB_MTU_256;
+
 	props->lid = 1;
 	props->lmc = 0;
 	props->sm_lid = 0;
 	props->sm_sl = 0;
-	props->state = IB_PORT_ACTIVE;
+	if (nesvnic->linkup)
+		props->state = IB_PORT_ACTIVE;
+	else
+		props->state = IB_PORT_DOWN;
 	props->phys_state = 0;
 	props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
 			IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
@@ -1506,12 +1523,45 @@
 
 
 /**
+ * nes_clean_cq
+ */
+static void nes_clean_cq(struct nes_qp *nesqp, struct nes_cq *nescq)
+{
+	u32 cq_head;
+	u32 lo;
+	u32 hi;
+	u64 u64temp;
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&nescq->lock, flags);
+
+	cq_head = nescq->hw_cq.cq_head;
+	while (le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
+		rmb();
+		lo = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+		hi = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]);
+		u64temp = (((u64)hi) << 32) | ((u64)lo);
+		u64temp &= ~(NES_SW_CONTEXT_ALIGN-1);
+		if (u64temp == (u64)(unsigned long)nesqp) {
+			/* Zero the context value so cqe will be ignored */
+			nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] = 0;
+			nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX] = 0;
+		}
+
+		if (++cq_head >= nescq->hw_cq.cq_size)
+			cq_head = 0;
+	}
+
+	spin_unlock_irqrestore(&nescq->lock, flags);
+}
+
+
+/**
  * nes_destroy_qp
  */
 static int nes_destroy_qp(struct ib_qp *ibqp)
 {
 	struct nes_qp *nesqp = to_nesqp(ibqp);
-	/* struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); */
 	struct nes_ucontext *nes_ucontext;
 	struct ib_qp_attr attr;
 	struct iw_cm_id *cm_id;
@@ -1548,7 +1598,6 @@
 			nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret);
 	}
 
-
 	if (nesqp->user_mode) {
 		if ((ibqp->uobject)&&(ibqp->uobject->context)) {
 			nes_ucontext = to_nesucontext(ibqp->uobject->context);
@@ -1560,6 +1609,13 @@
 		}
 		if (nesqp->pbl_pbase)
 			kunmap(nesqp->page);
+	} else {
+		/* Clean any pending completions from the cq(s) */
+		if (nesqp->nesscq)
+			nes_clean_cq(nesqp, nesqp->nesscq);
+
+		if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq))
+			nes_clean_cq(nesqp, nesqp->nesrcq);
 	}
 
 	nes_rem_ref(&nesqp->ibqp);
@@ -2884,7 +2940,7 @@
  * nes_hw_modify_qp
  */
 int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
-		u32 next_iwarp_state, u32 wait_completion)
+		u32 next_iwarp_state, u32 termlen, u32 wait_completion)
 {
 	struct nes_hw_cqp_wqe *cqp_wqe;
 	/* struct iw_cm_id *cm_id = nesqp->cm_id; */
@@ -2916,6 +2972,13 @@
 	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
 	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase);
 
+	/* If sending a terminate message, fill in the length (in words) */
+	if (((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == NES_CQP_QP_IWARP_STATE_TERMINATE) &&
+	    !(next_iwarp_state & NES_CQP_QP_TERM_DONT_SEND_TERM_MSG)) {
+		termlen = ((termlen + 3) >> 2) << NES_CQP_OP_TERMLEN_SHIFT;
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_NEW_MSS_IDX, termlen);
+	}
+
 	atomic_set(&cqp_request->refcount, 2);
 	nes_post_cqp_request(nesdev, cqp_request);
 
@@ -3086,6 +3149,9 @@
 				}
 				nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n",
 						nesqp->hwqp.qp_id);
+				if (nesqp->term_flags)
+					del_timer(&nesqp->terminate_timer);
+
 				next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
 				/* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
 					if (nesqp->hte_added) {
@@ -3163,7 +3229,7 @@
 
 	if (issue_modify_qp) {
 		nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n");
-		ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 1);
+		ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 1);
 		if (ret)
 			nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)"
 					" failed for QP%u.\n",
@@ -3328,6 +3394,12 @@
 	head = nesqp->hwqp.sq_head;
 
 	while (ib_wr) {
+		/* Check for QP error */
+		if (nesqp->term_flags) {
+			err = -EINVAL;
+			break;
+		}
+
 		/* Check for SQ overflow */
 		if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
 			err = -EINVAL;
@@ -3484,6 +3556,12 @@
 	head = nesqp->hwqp.rq_head;
 
 	while (ib_wr) {
+		/* Check for QP error */
+		if (nesqp->term_flags) {
+			err = -EINVAL;
+			break;
+		}
+
 		if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
 			err = -EINVAL;
 			break;
@@ -3547,7 +3625,6 @@
 {
 	u64 u64temp;
 	u64 wrid;
-	/* u64 u64temp; */
 	unsigned long flags = 0;
 	struct nes_vnic *nesvnic = to_nesvnic(ibcq->device);
 	struct nes_device *nesdev = nesvnic->nesdev;
@@ -3555,12 +3632,13 @@
 	struct nes_qp *nesqp;
 	struct nes_hw_cqe cqe;
 	u32 head;
-	u32 wq_tail;
+	u32 wq_tail = 0;
 	u32 cq_size;
 	u32 cqe_count = 0;
 	u32 wqe_index;
 	u32 u32temp;
-	/* u32 counter; */
+	u32 move_cq_head = 1;
+	u32 err_code;
 
 	nes_debug(NES_DBG_CQ, "\n");
 
@@ -3570,29 +3648,40 @@
 	cq_size = nescq->hw_cq.cq_size;
 
 	while (cqe_count < num_entries) {
-		if (le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
-				NES_CQE_VALID) {
-			/*
-			 * Make sure we read CQ entry contents *after*
-			 * we've checked the valid bit.
-			 */
-			rmb();
+		if ((le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
+				NES_CQE_VALID) == 0)
+			break;
 
-			cqe = nescq->hw_cq.cq_vbase[head];
-			nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
-			u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
-			wqe_index = u32temp &
-					(nesdev->nesadapter->max_qp_wr - 1);
-			u32temp &= ~(NES_SW_CONTEXT_ALIGN-1);
-			/* parse CQE, get completion context from WQE (either rq or sq */
-			u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
-					((u64)u32temp);
-			nesqp = *((struct nes_qp **)&u64temp);
+		/*
+		 * Make sure we read CQ entry contents *after*
+		 * we've checked the valid bit.
+		 */
+		rmb();
+
+		cqe = nescq->hw_cq.cq_vbase[head];
+		u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+		wqe_index = u32temp & (nesdev->nesadapter->max_qp_wr - 1);
+		u32temp &= ~(NES_SW_CONTEXT_ALIGN-1);
+		/* parse CQE, get completion context from WQE (either rq or sq) */
+		u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
+				((u64)u32temp);
+
+		if (u64temp) {
+			nesqp = (struct nes_qp *)(unsigned long)u64temp;
 			memset(entry, 0, sizeof *entry);
 			if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) {
 				entry->status = IB_WC_SUCCESS;
 			} else {
-				entry->status = IB_WC_WR_FLUSH_ERR;
+				err_code = le32_to_cpu(cqe.cqe_words[NES_CQE_ERROR_CODE_IDX]);
+				if (NES_IWARP_CQE_MAJOR_DRV == (err_code >> 16)) {
+					entry->status = err_code & 0x0000ffff;
+
+					/* The rest of the cqe's will be marked as flushed */
+					nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX] =
+						cpu_to_le32((NES_IWARP_CQE_MAJOR_FLUSH << 16) |
+							    NES_IWARP_CQE_MINOR_FLUSH);
+				} else
+					entry->status = IB_WC_WR_FLUSH_ERR;
 			}
 
 			entry->qp = &nesqp->ibqp;
@@ -3601,20 +3690,18 @@
 			if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) {
 				if (nesqp->skip_lsmm) {
 					nesqp->skip_lsmm = 0;
-					wq_tail = nesqp->hwqp.sq_tail++;
+					nesqp->hwqp.sq_tail++;
 				}
 
 				/* Working on a SQ Completion*/
-				wq_tail = wqe_index;
-				nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
-				wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].
+				wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
 						wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
-						((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].
+						((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
 						wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX])));
-				entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+				entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
 						wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]);
 
-				switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+				switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
 						wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) {
 					case NES_IWARP_SQ_OP_RDMAW:
 						nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n");
@@ -3623,7 +3710,7 @@
 					case NES_IWARP_SQ_OP_RDMAR:
 						nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n");
 						entry->opcode = IB_WC_RDMA_READ;
-						entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+						entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
 								wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]);
 						break;
 					case NES_IWARP_SQ_OP_SENDINV:
@@ -3634,33 +3721,54 @@
 						entry->opcode = IB_WC_SEND;
 						break;
 				}
+
+				nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
+				if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)) {
+					move_cq_head = 0;
+					wq_tail = nesqp->hwqp.sq_tail;
+				}
 			} else {
 				/* Working on a RQ Completion*/
-				wq_tail = wqe_index;
-					nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
 				entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]);
-				wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) |
-					((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32);
+				wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) |
+					((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32);
 					entry->opcode = IB_WC_RECV;
-			}
-			entry->wr_id = wrid;
 
+				nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
+				if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)) {
+					move_cq_head = 0;
+					wq_tail = nesqp->hwqp.rq_tail;
+				}
+			}
+
+			entry->wr_id = wrid;
+			entry++;
+			cqe_count++;
+		}
+
+		if (move_cq_head) {
+			nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
 			if (++head >= cq_size)
 				head = 0;
-			cqe_count++;
 			nescq->polled_completions++;
+
 			if ((nescq->polled_completions > (cq_size / 2)) ||
 					(nescq->polled_completions == 255)) {
 				nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes"
-						" are pending %u of %u.\n",
-						nescq->hw_cq.cq_number, nescq->polled_completions, cq_size);
+					" are pending %u of %u.\n",
+					nescq->hw_cq.cq_number, nescq->polled_completions, cq_size);
 				nes_write32(nesdev->regs+NES_CQE_ALLOC,
-						nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
+					nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
 				nescq->polled_completions = 0;
 			}
-			entry++;
-		} else
-			break;
+		} else {
+			/* Update the wqe index and set status to flush */
+			wqe_index = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+			wqe_index = (wqe_index & (~(nesdev->nesadapter->max_qp_wr - 1))) | wq_tail;
+			nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] =
+				cpu_to_le32(wqe_index);
+			move_cq_head = 1; /* ready for next pass */
+		}
 	}
 
 	if (nescq->polled_completions) {

diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 41c07f2..89822d7 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h

@@ -40,6 +40,10 @@
 #define NES_MAX_USER_DB_REGIONS  4096
 #define NES_MAX_USER_WQ_REGIONS  4096
 
+#define NES_TERM_SENT            0x01
+#define NES_TERM_RCVD            0x02
+#define NES_TERM_DONE            0x04
+
 struct nes_ucontext {
 	struct ib_ucontext ibucontext;
 	struct nes_device  *nesdev;
@@ -119,6 +123,11 @@
 	spinlock_t lock;
 };
 
+struct disconn_work {
+	struct work_struct    work;
+	struct nes_qp         *nesqp;
+};
+
 struct iw_cm_id;
 struct ietf_mpa_frame;
 
@@ -127,7 +136,6 @@
 	void                  *allocated_buffer;
 	struct iw_cm_id       *cm_id;
 	struct workqueue_struct *wq;
-	struct work_struct    disconn_work;
 	struct nes_cq         *nesscq;
 	struct nes_cq         *nesrcq;
 	struct nes_pd         *nespd;
@@ -155,9 +163,13 @@
 	void	              *pbl_vbase;
 	dma_addr_t            pbl_pbase;
 	struct page           *page;
+	struct timer_list     terminate_timer;
+	enum ib_event_type    terminate_eventtype;
 	wait_queue_head_t     kick_waitq;
 	u16                   in_disconnect;
 	u16                   private_data_len;
+	u16                   term_sq_flush_code;
+	u16                   term_rq_flush_code;
 	u8                    active_conn;
 	u8                    skip_lsmm;
 	u8                    user_mode;
@@ -165,7 +177,7 @@
 	u8                    hw_iwarp_state;
 	u8                    flush_issued;
 	u8                    hw_tcp_state;
-	u8                    disconn_pending;
+	u8                    term_flags;
 	u8                    destroyed;
 };
 #endif			/* NES_VERBS_H */

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 181b1f3..8f4b4fc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c

@@ -31,7 +31,6 @@
  */
 
 #include <rdma/ib_cm.h>
-#include <rdma/ib_cache.h>
 #include <net/dst.h>
 #include <net/icmp.h>
 #include <linux/icmpv6.h>

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index e7e5adf..e35f4a0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c

@@ -36,7 +36,6 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 
-#include <rdma/ib_cache.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
 

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e319d91..2bf5116 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c

@@ -604,8 +604,11 @@
 					   skb_queue_len(&neigh->queue));
 				goto err_drop;
 			}
-		} else
+		} else {
+			spin_unlock_irqrestore(&priv->lock, flags);
 			ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha));
+			return;
+		}
 	} else {
 		neigh->ah  = NULL;
 
@@ -688,7 +691,9 @@
 		ipoib_dbg(priv, "Send unicast ARP to %04x\n",
 			  be16_to_cpu(path->pathrec.dlid));
 
+		spin_unlock_irqrestore(&priv->lock, flags);
 		ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+		return;
 	} else if ((path->query || !path_rec_start(dev, path)) &&
 		   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
 		/* put pseudoheader back on for next time */

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index a0e9753..25874fc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c

@@ -720,7 +720,9 @@
 			}
 		}
 
+		spin_unlock_irqrestore(&priv->lock, flags);
 		ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+		return;
 	}
 
 unlock:
@@ -758,6 +760,20 @@
 	}
 }
 
+static int ipoib_mcast_addr_is_valid(const u8 *addr, unsigned int addrlen,
+				     const u8 *broadcast)
+{
+	if (addrlen != INFINIBAND_ALEN)
+		return 0;
+	/* reserved QPN, prefix, scope */
+	if (memcmp(addr, broadcast, 6))
+		return 0;
+	/* signature lower, pkey */
+	if (memcmp(addr + 7, broadcast + 7, 3))
+		return 0;
+	return 1;
+}
+
 void ipoib_mcast_restart_task(struct work_struct *work)
 {
 	struct ipoib_dev_priv *priv =
@@ -791,6 +807,11 @@
 	for (mclist = dev->mc_list; mclist; mclist = mclist->next) {
 		union ib_gid mgid;
 
+		if (!ipoib_mcast_addr_is_valid(mclist->dmi_addr,
+					       mclist->dmi_addrlen,
+					       dev->broadcast))
+			continue;
+
 		memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid);
 
 		mcast = __ipoib_mcast_find(dev, &mgid);

diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index 6e186b1..652bd33 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c

@@ -582,7 +582,7 @@
 		break;
 	case STATUSTYPE_TABLE:
 		sz = 0;
-		table_args = strstr(lc->usr_argv_str, " ");
+		table_args = strchr(lc->usr_argv_str, ' ');
 		BUG_ON(!table_args); /* There will always be a ' ' */
 		table_args++;
 

diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index fb5df5c..c97ab82 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c

@@ -1286,6 +1286,7 @@
 	if (!other_ports)
 		schedule_chk_task(adapter);
 
+	cxgb3_event_notify(&adapter->tdev, OFFLOAD_PORT_UP, pi->port_id);
 	return 0;
 }
 
@@ -1318,6 +1319,7 @@
 	if (!adapter->open_device_map)
 		cxgb_down(adapter);
 
+	cxgb3_event_notify(&adapter->tdev, OFFLOAD_PORT_DOWN, pi->port_id);
 	return 0;
 }
 
@@ -2717,7 +2719,7 @@
 
 	if (is_offload(adapter) &&
 	    test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) {
-		cxgb3_err_notify(&adapter->tdev, OFFLOAD_STATUS_DOWN, 0);
+		cxgb3_event_notify(&adapter->tdev, OFFLOAD_STATUS_DOWN, 0);
 		offload_close(&adapter->tdev);
 	}
 
@@ -2782,7 +2784,7 @@
 	}
 
 	if (is_offload(adapter) && !ofld_disable)
-		cxgb3_err_notify(&adapter->tdev, OFFLOAD_STATUS_UP, 0);
+		cxgb3_event_notify(&adapter->tdev, OFFLOAD_STATUS_UP, 0);
 }
 
 /*

diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
index f9f54b5..75064ee 100644
--- a/drivers/net/cxgb3/cxgb3_offload.c
+++ b/drivers/net/cxgb3/cxgb3_offload.c

@@ -153,14 +153,14 @@
 	mutex_unlock(&cxgb3_db_lock);
 }
 
-void cxgb3_err_notify(struct t3cdev *tdev, u32 status, u32 error)
+void cxgb3_event_notify(struct t3cdev *tdev, u32 event, u32 port)
 {
 	struct cxgb3_client *client;
 
 	mutex_lock(&cxgb3_db_lock);
 	list_for_each_entry(client, &client_list, client_list) {
-		if (client->err_handler)
-			client->err_handler(tdev, status, error);
+		if (client->event_handler)
+			client->event_handler(tdev, event, port);
 	}
 	mutex_unlock(&cxgb3_db_lock);
 }

diff --git a/drivers/net/cxgb3/cxgb3_offload.h b/drivers/net/cxgb3/cxgb3_offload.h
index 55945f4..670aa62 100644
--- a/drivers/net/cxgb3/cxgb3_offload.h
+++ b/drivers/net/cxgb3/cxgb3_offload.h

@@ -64,14 +64,16 @@
 void cxgb3_unregister_client(struct cxgb3_client *client);
 void cxgb3_add_clients(struct t3cdev *tdev);
 void cxgb3_remove_clients(struct t3cdev *tdev);
-void cxgb3_err_notify(struct t3cdev *tdev, u32 status, u32 error);
+void cxgb3_event_notify(struct t3cdev *tdev, u32 event, u32 port);
 
 typedef int (*cxgb3_cpl_handler_func)(struct t3cdev *dev,
 				      struct sk_buff *skb, void *ctx);
 
 enum {
 	OFFLOAD_STATUS_UP,
-	OFFLOAD_STATUS_DOWN
+	OFFLOAD_STATUS_DOWN,
+	OFFLOAD_PORT_DOWN,
+	OFFLOAD_PORT_UP
 };
 
 struct cxgb3_client {
@@ -82,7 +84,7 @@
 	int (*redirect)(void *ctx, struct dst_entry *old,
 			struct dst_entry *new, struct l2t_entry *l2t);
 	struct list_head client_list;
-	void (*err_handler)(struct t3cdev *tdev, u32 status, u32 error);
+	void (*event_handler)(struct t3cdev *tdev, u32 event, u32 port);
 };
 
 /*

diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
index ac57b6a..ccfe276 100644
--- a/drivers/net/mlx4/cq.c
+++ b/drivers/net/mlx4/cq.c

@@ -34,7 +34,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/hardirq.h>
 
 #include <linux/mlx4/cmd.h>

diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index b9ceddd..bffb799 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c

@@ -31,7 +31,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
@@ -42,6 +41,10 @@
 #include "fw.h"
 
 enum {
+	MLX4_IRQNAME_SIZE	= 64
+};
+
+enum {
 	MLX4_NUM_ASYNC_EQE	= 0x100,
 	MLX4_NUM_SPARE_EQE	= 0x80,
 	MLX4_EQ_ENTRY_SIZE	= 0x20
@@ -526,48 +529,6 @@
 	iounmap(priv->clr_base);
 }
 
-int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
-{
-	struct mlx4_priv *priv = mlx4_priv(dev);
-	int ret;
-
-	/*
-	 * We assume that mapping one page is enough for the whole EQ
-	 * context table.  This is fine with all current HCAs, because
-	 * we only use 32 EQs and each EQ uses 64 bytes of context
-	 * memory, or 1 KB total.
-	 */
-	priv->eq_table.icm_virt = icm_virt;
-	priv->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
-	if (!priv->eq_table.icm_page)
-		return -ENOMEM;
-	priv->eq_table.icm_dma  = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0,
-					       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	if (pci_dma_mapping_error(dev->pdev, priv->eq_table.icm_dma)) {
-		__free_page(priv->eq_table.icm_page);
-		return -ENOMEM;
-	}
-
-	ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt);
-	if (ret) {
-		pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
-			       PCI_DMA_BIDIRECTIONAL);
-		__free_page(priv->eq_table.icm_page);
-	}
-
-	return ret;
-}
-
-void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
-{
-	struct mlx4_priv *priv = mlx4_priv(dev);
-
-	mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, 1);
-	pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
-		       PCI_DMA_BIDIRECTIONAL);
-	__free_page(priv->eq_table.icm_page);
-}
-
 int mlx4_alloc_eq_table(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -615,7 +576,9 @@
 	priv->eq_table.clr_int  = priv->clr_base +
 		(priv->eq_table.inta_pin < 32 ? 4 : 0);
 
-	priv->eq_table.irq_names = kmalloc(16 * dev->caps.num_comp_vectors, GFP_KERNEL);
+	priv->eq_table.irq_names =
+		kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1),
+			GFP_KERNEL);
 	if (!priv->eq_table.irq_names) {
 		err = -ENOMEM;
 		goto err_out_bitmap;
@@ -638,17 +601,25 @@
 		goto err_out_comp;
 
 	if (dev->flags & MLX4_FLAG_MSI_X) {
-		static const char async_eq_name[] = "mlx4-async";
 		const char *eq_name;
 
 		for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) {
 			if (i < dev->caps.num_comp_vectors) {
-				snprintf(priv->eq_table.irq_names + i * 16, 16,
-					 "mlx4-comp-%d", i);
-				eq_name = priv->eq_table.irq_names + i * 16;
-			} else
-				eq_name = async_eq_name;
+				snprintf(priv->eq_table.irq_names +
+					 i * MLX4_IRQNAME_SIZE,
+					 MLX4_IRQNAME_SIZE,
+					 "mlx4-comp-%d@pci:%s", i,
+					 pci_name(dev->pdev));
+			} else {
+				snprintf(priv->eq_table.irq_names +
+					 i * MLX4_IRQNAME_SIZE,
+					 MLX4_IRQNAME_SIZE,
+					 "mlx4-async@pci:%s",
+					 pci_name(dev->pdev));
+			}
 
+			eq_name = priv->eq_table.irq_names +
+				  i * MLX4_IRQNAME_SIZE;
 			err = request_irq(priv->eq_table.eq[i].irq,
 					  mlx4_msi_x_interrupt, 0, eq_name,
 					  priv->eq_table.eq + i);
@@ -658,8 +629,12 @@
 			priv->eq_table.eq[i].have_irq = 1;
 		}
 	} else {
+		snprintf(priv->eq_table.irq_names,
+			 MLX4_IRQNAME_SIZE,
+			 DRV_NAME "@pci:%s",
+			 pci_name(dev->pdev));
 		err = request_irq(dev->pdev->irq, mlx4_interrupt,
-				  IRQF_SHARED, DRV_NAME, dev);
+				  IRQF_SHARED, priv->eq_table.irq_names, dev);
 		if (err)
 			goto err_out_async;
 

diff --git a/drivers/net/mlx4/icm.c b/drivers/net/mlx4/icm.c
index baf4bf6..04b382f 100644
--- a/drivers/net/mlx4/icm.c
+++ b/drivers/net/mlx4/icm.c

@@ -31,7 +31,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/scatterlist.h>

diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index dac621b..3dd481e 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c

@@ -525,7 +525,10 @@
 		goto err_unmap_aux;
 	}
 
-	err = mlx4_map_eq_icm(dev, init_hca->eqc_base);
+	err = mlx4_init_icm_table(dev, &priv->eq_table.table,
+				  init_hca->eqc_base, dev_cap->eqc_entry_sz,
+				  dev->caps.num_eqs, dev->caps.num_eqs,
+				  0, 0);
 	if (err) {
 		mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
 		goto err_unmap_cmpt;
@@ -668,7 +671,7 @@
 	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
 
 err_unmap_eq:
-	mlx4_unmap_eq_icm(dev);
+	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
 
 err_unmap_cmpt:
 	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
@@ -698,11 +701,11 @@
 	mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
 	mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
 	mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
+	mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
 	mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
 	mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
 	mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
 	mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
-	mlx4_unmap_eq_icm(dev);
 
 	mlx4_UNMAP_ICM_AUX(dev);
 	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
@@ -786,7 +789,7 @@
 	return 0;
 
 err_close:
-	mlx4_close_hca(dev);
+	mlx4_CLOSE_HCA(dev, 0);
 
 err_free_icm:
 	mlx4_free_icms(dev);
@@ -1070,18 +1073,12 @@
 		goto err_disable_pdev;
 	}
 
-	err = pci_request_region(pdev, 0, DRV_NAME);
+	err = pci_request_regions(pdev, DRV_NAME);
 	if (err) {
-		dev_err(&pdev->dev, "Cannot request control region, aborting.\n");
+		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
 		goto err_disable_pdev;
 	}
 
-	err = pci_request_region(pdev, 2, DRV_NAME);
-	if (err) {
-		dev_err(&pdev->dev, "Cannot request UAR region, aborting.\n");
-		goto err_release_bar0;
-	}
-
 	pci_set_master(pdev);
 
 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -1090,7 +1087,7 @@
 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (err) {
 			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
-			goto err_release_bar2;
+			goto err_release_regions;
 		}
 	}
 	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -1101,7 +1098,7 @@
 		if (err) {
 			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
 				"aborting.\n");
-			goto err_release_bar2;
+			goto err_release_regions;
 		}
 	}
 
@@ -1110,7 +1107,7 @@
 		dev_err(&pdev->dev, "Device struct alloc failed, "
 			"aborting.\n");
 		err = -ENOMEM;
-		goto err_release_bar2;
+		goto err_release_regions;
 	}
 
 	dev       = &priv->dev;
@@ -1205,11 +1202,8 @@
 err_free_dev:
 	kfree(priv);
 
-err_release_bar2:
-	pci_release_region(pdev, 2);
-
-err_release_bar0:
-	pci_release_region(pdev, 0);
+err_release_regions:
+	pci_release_regions(pdev);
 
 err_disable_pdev:
 	pci_disable_device(pdev);
@@ -1265,8 +1259,7 @@
 			pci_disable_msix(pdev);
 
 		kfree(priv);
-		pci_release_region(pdev, 2);
-		pci_release_region(pdev, 0);
+		pci_release_regions(pdev);
 		pci_disable_device(pdev);
 		pci_set_drvdata(pdev, NULL);
 	}

diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c
index 6053c35..5ccbce9 100644
--- a/drivers/net/mlx4/mcg.c
+++ b/drivers/net/mlx4/mcg.c

@@ -31,7 +31,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 

diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 5bd79c2..bc72d6e 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h

@@ -205,9 +205,7 @@
 	void __iomem	      **uar_map;
 	u32			clr_mask;
 	struct mlx4_eq	       *eq;
-	u64			icm_virt;
-	struct page	       *icm_page;
-	dma_addr_t		icm_dma;
+	struct mlx4_icm_table	table;
 	struct mlx4_icm_table	cmpt_table;
 	int			have_irq;
 	u8			inta_pin;
@@ -373,9 +371,6 @@
 		      struct mlx4_dev_cap *dev_cap,
 		      struct mlx4_init_hca_param *init_hca);
 
-int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt);
-void mlx4_unmap_eq_icm(struct mlx4_dev *dev);
-
 int mlx4_cmd_init(struct mlx4_dev *dev);
 void mlx4_cmd_cleanup(struct mlx4_dev *dev);
 void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);

diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index f96948b..ca7ab8e 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c

@@ -32,7 +32,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/errno.h>
 
 #include <linux/mlx4/cmd.h>

diff --git a/drivers/net/mlx4/pd.c b/drivers/net/mlx4/pd.c
index 26d1a7a..c4988d6 100644
--- a/drivers/net/mlx4/pd.c
+++ b/drivers/net/mlx4/pd.c

@@ -31,7 +31,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/errno.h>
 
 #include <asm/page.h>

diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c
index bd22df9..ca25b9d 100644
--- a/drivers/net/mlx4/profile.c
+++ b/drivers/net/mlx4/profile.c

@@ -32,8 +32,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
-
 #include "mlx4.h"
 #include "fw.h"
 

diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c
index 1c565ef..42ab9fc 100644
--- a/drivers/net/mlx4/qp.c
+++ b/drivers/net/mlx4/qp.c

@@ -33,8 +33,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
-
 #include <linux/mlx4/cmd.h>
 #include <linux/mlx4/qp.h>
 

diff --git a/drivers/net/mlx4/reset.c b/drivers/net/mlx4/reset.c
index 3951b88..e5741da 100644
--- a/drivers/net/mlx4/reset.c
+++ b/drivers/net/mlx4/reset.c

@@ -31,7 +31,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/pci.h>
 #include <linux/delay.h>

diff --git a/drivers/net/mlx4/srq.c b/drivers/net/mlx4/srq.c
index fe9f218..1377d0d 100644
--- a/drivers/net/mlx4/srq.c
+++ b/drivers/net/mlx4/srq.c

@@ -31,8 +31,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
-
 #include <linux/mlx4/cmd.h>
 
 #include "mlx4.h"

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 42b6c63..87214a2 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c

@@ -130,17 +130,10 @@
 static int tun_attach(struct tun_struct *tun, struct file *file)
 {
 	struct tun_file *tfile = file->private_data;
-	const struct cred *cred = current_cred();
 	int err;
 
 	ASSERT_RTNL();
 
-	/* Check permissions */
-	if (((tun->owner != -1 && cred->euid != tun->owner) ||
-	     (tun->group != -1 && !in_egroup_p(tun->group))) &&
-		!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
 	netif_tx_lock_bh(tun->dev);
 
 	err = -EINVAL;
@@ -926,6 +919,8 @@
 
 	dev = __dev_get_by_name(net, ifr->ifr_name);
 	if (dev) {
+		const struct cred *cred = current_cred();
+
 		if (ifr->ifr_flags & IFF_TUN_EXCL)
 			return -EBUSY;
 		if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
@@ -935,6 +930,14 @@
 		else
 			return -EINVAL;
 
+		if (((tun->owner != -1 && cred->euid != tun->owner) ||
+		     (tun->group != -1 && !in_egroup_p(tun->group))) &&
+		    !capable(CAP_NET_ADMIN))
+			return -EPERM;
+		err = security_tun_dev_attach(tun->sk);
+		if (err < 0)
+			return err;
+
 		err = tun_attach(tun, file);
 		if (err < 0)
 			return err;
@@ -947,6 +950,9 @@
 
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
+		err = security_tun_dev_create();
+		if (err < 0)
+			return err;
 
 		/* Set dev type */
 		if (ifr->ifr_flags & IFF_TUN) {
@@ -989,6 +995,8 @@
 		tun->sk = sk;
 		container_of(sk, struct tun_sock, sk)->tun = tun;
 
+		security_tun_dev_post_create(sk);
+
 		tun_net_init(dev);
 
 		if (strchr(dev->name, '%')) {

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 3f62dd5..e109da4 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c

@@ -669,14 +669,14 @@
  * memory and 2) dasd_smalloc_request uses the static ccw memory
  * that gets allocated for each device.
  */
-struct dasd_ccw_req *dasd_kmalloc_request(char *magic, int cplength,
+struct dasd_ccw_req *dasd_kmalloc_request(int magic, int cplength,
 					  int datasize,
 					  struct dasd_device *device)
 {
 	struct dasd_ccw_req *cqr;
 
 	/* Sanity checks */
-	BUG_ON( magic == NULL || datasize > PAGE_SIZE ||
+	BUG_ON(datasize > PAGE_SIZE ||
 	     (cplength*sizeof(struct ccw1)) > PAGE_SIZE);
 
 	cqr = kzalloc(sizeof(struct dasd_ccw_req), GFP_ATOMIC);
@@ -700,14 +700,13 @@
 			return ERR_PTR(-ENOMEM);
 		}
 	}
-	strncpy((char *) &cqr->magic, magic, 4);
-	ASCEBC((char *) &cqr->magic, 4);
+	cqr->magic =  magic;
 	set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
 	dasd_get_device(device);
 	return cqr;
 }
 
-struct dasd_ccw_req *dasd_smalloc_request(char *magic, int cplength,
+struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength,
 					  int datasize,
 					  struct dasd_device *device)
 {
@@ -717,7 +716,7 @@
 	int size;
 
 	/* Sanity checks */
-	BUG_ON( magic == NULL || datasize > PAGE_SIZE ||
+	BUG_ON(datasize > PAGE_SIZE ||
 	     (cplength*sizeof(struct ccw1)) > PAGE_SIZE);
 
 	size = (sizeof(struct dasd_ccw_req) + 7L) & -8L;
@@ -744,8 +743,7 @@
 		cqr->data = data;
  		memset(cqr->data, 0, datasize);
 	}
-	strncpy((char *) &cqr->magic, magic, 4);
-	ASCEBC((char *) &cqr->magic, 4);
+	cqr->magic = magic;
 	set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
 	dasd_get_device(device);
 	return cqr;
@@ -899,9 +897,6 @@
 	switch (rc) {
 	case 0:
 		cqr->status = DASD_CQR_IN_IO;
-		DBF_DEV_EVENT(DBF_DEBUG, device,
-			      "start_IO: request %p started successful",
-			      cqr);
 		break;
 	case -EBUSY:
 		DBF_DEV_EVENT(DBF_DEBUG, device, "%s",
@@ -1699,8 +1694,11 @@
 	 * for that. State DASD_STATE_ONLINE is normal block device
 	 * operation.
 	 */
-	if (basedev->state < DASD_STATE_READY)
+	if (basedev->state < DASD_STATE_READY) {
+		while ((req = blk_fetch_request(block->request_queue)))
+			__blk_end_request_all(req, -EIO);
 		return;
+	}
 	/* Now we try to fetch requests from the request queue */
 	while (!blk_queue_plugged(queue) && (req = blk_peek_request(queue))) {
 		if (basedev->features & DASD_FEATURE_READONLY &&
@@ -2530,7 +2528,7 @@
 static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device,
 						   void *rdc_buffer,
 						   int rdc_buffer_size,
-						   char *magic)
+						   int magic)
 {
 	struct dasd_ccw_req *cqr;
 	struct ccw1 *ccw;
@@ -2561,7 +2559,7 @@
 }
 
 
-int dasd_generic_read_dev_chars(struct dasd_device *device, char *magic,
+int dasd_generic_read_dev_chars(struct dasd_device *device, int magic,
 				void *rdc_buffer, int rdc_buffer_size)
 {
 	int ret;

diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index 27991b6..e8ff7b0 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c

@@ -7,7 +7,7 @@
  *
  */
 
-#define KMSG_COMPONENT "dasd"
+#define KMSG_COMPONENT "dasd-eckd"
 
 #include <linux/timer.h>
 #include <linux/slab.h>

diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index 5b7bbc8..70a008c 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c

@@ -5,7 +5,7 @@
  * Author(s): Stefan Weinhuber <wein@de.ibm.com>
  */
 
-#define KMSG_COMPONENT "dasd"
+#define KMSG_COMPONENT "dasd-eckd"
 
 #include <linux/list.h>
 #include <asm/ebcdic.h>
@@ -379,8 +379,7 @@
 	int rc;
 	unsigned long flags;
 
-	cqr = dasd_kmalloc_request("ECKD",
-				   1 /* PSF */	+ 1 /* RSSD */ ,
+	cqr = dasd_kmalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */	+ 1 /* RSSD */,
 				   (sizeof(struct dasd_psf_prssd_data)),
 				   device);
 	if (IS_ERR(cqr))

diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 644086b..4e49b4a 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c

@@ -8,7 +8,7 @@
  *
  */
 
-#define KMSG_COMPONENT "dasd"
+#define KMSG_COMPONENT "dasd-diag"
 
 #include <linux/stddef.h>
 #include <linux/kernel.h>
@@ -523,8 +523,7 @@
 	/* Build the request */
 	datasize = sizeof(struct dasd_diag_req) +
 		count*sizeof(struct dasd_diag_bio);
-	cqr = dasd_smalloc_request(dasd_diag_discipline.name, 0,
-				   datasize, memdev);
+	cqr = dasd_smalloc_request(DASD_DIAG_MAGIC, 0, datasize, memdev);
 	if (IS_ERR(cqr))
 		return cqr;
 

diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index c11770f..a1ce573 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c

@@ -10,7 +10,7 @@
  * Author.........: Nigel Hislop <hislop_nigel@emc.com>
  */
 
-#define KMSG_COMPONENT "dasd"
+#define KMSG_COMPONENT "dasd-eckd"
 
 #include <linux/stddef.h>
 #include <linux/kernel.h>
@@ -730,7 +730,8 @@
 	struct dasd_ccw_req *cqr;
 	struct ccw1 *ccw;
 
-	cqr = dasd_smalloc_request("ECKD", 1 /* RCD */, ciw->count, device);
+	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* RCD */, ciw->count,
+				   device);
 
 	if (IS_ERR(cqr)) {
 		DBF_DEV_EVENT(DBF_WARNING, device, "%s",
@@ -934,8 +935,7 @@
 	struct dasd_eckd_private *private;
 
 	private = (struct dasd_eckd_private *) device->private;
-	cqr = dasd_smalloc_request(dasd_eckd_discipline.name,
-				   1 /* PSF */	+ 1 /* RSSD */ ,
+	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */	+ 1 /* RSSD */,
 				   (sizeof(struct dasd_psf_prssd_data) +
 				    sizeof(struct dasd_rssd_features)),
 				   device);
@@ -998,7 +998,7 @@
 	struct dasd_psf_ssc_data *psf_ssc_data;
 	struct ccw1 *ccw;
 
-	cqr = dasd_smalloc_request("ECKD", 1 /* PSF */ ,
+	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ ,
 				  sizeof(struct dasd_psf_ssc_data),
 				  device);
 
@@ -1149,8 +1149,8 @@
 		goto out_err3;
 
 	/* Read Device Characteristics */
-	rc = dasd_generic_read_dev_chars(device, "ECKD", &private->rdc_data,
-					 64);
+	rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC,
+					 &private->rdc_data, 64);
 	if (rc) {
 		DBF_EVENT(DBF_WARNING,
 			  "Read device characteristics failed, rc=%d for "
@@ -1217,8 +1217,7 @@
 
 	cplength = 8;
 	datasize = sizeof(struct DE_eckd_data) + 2*sizeof(struct LO_eckd_data);
-	cqr = dasd_smalloc_request(dasd_eckd_discipline.name,
-				   cplength, datasize, device);
+	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, device);
 	if (IS_ERR(cqr))
 		return cqr;
 	ccw = cqr->cpaddr;
@@ -1499,8 +1498,7 @@
 		return ERR_PTR(-EINVAL);
 	}
 	/* Allocate the format ccw request. */
-	fcp = dasd_smalloc_request(dasd_eckd_discipline.name,
-				   cplength, datasize, device);
+	fcp = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, device);
 	if (IS_ERR(fcp))
 		return fcp;
 
@@ -1783,8 +1781,8 @@
 		datasize += count*sizeof(struct LO_eckd_data);
 	}
 	/* Allocate the ccw request. */
-	cqr = dasd_smalloc_request(dasd_eckd_discipline.name,
-				   cplength, datasize, startdev);
+	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize,
+				   startdev);
 	if (IS_ERR(cqr))
 		return cqr;
 	ccw = cqr->cpaddr;
@@ -1948,8 +1946,8 @@
 		cidaw * sizeof(unsigned long long);
 
 	/* Allocate the ccw request. */
-	cqr = dasd_smalloc_request(dasd_eckd_discipline.name,
-				   cplength, datasize, startdev);
+	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize,
+				   startdev);
 	if (IS_ERR(cqr))
 		return cqr;
 	ccw = cqr->cpaddr;
@@ -2249,8 +2247,7 @@
 
 	/* Allocate the ccw request. */
 	itcw_size = itcw_calc_size(0, ctidaw, 0);
-	cqr = dasd_smalloc_request(dasd_eckd_discipline.name,
-				   0, itcw_size, startdev);
+	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 0, itcw_size, startdev);
 	if (IS_ERR(cqr))
 		return cqr;
 
@@ -2557,8 +2554,7 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
-	cqr = dasd_smalloc_request(dasd_eckd_discipline.name,
-				   1, 32, device);
+	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device);
 	if (IS_ERR(cqr)) {
 		DBF_DEV_EVENT(DBF_WARNING, device, "%s",
 			    "Could not allocate initialization request");
@@ -2600,8 +2596,7 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
-	cqr = dasd_smalloc_request(dasd_eckd_discipline.name,
-				   1, 32, device);
+	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device);
 	if (IS_ERR(cqr)) {
 		DBF_DEV_EVENT(DBF_WARNING, device, "%s",
 			    "Could not allocate initialization request");
@@ -2642,8 +2637,7 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
-	cqr = dasd_smalloc_request(dasd_eckd_discipline.name,
-				   1, 32, device);
+	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device);
 	if (IS_ERR(cqr)) {
 		DBF_DEV_EVENT(DBF_WARNING, device, "%s",
 			    "Could not allocate initialization request");
@@ -2681,8 +2675,7 @@
 	struct ccw1 *ccw;
 	int rc;
 
-	cqr = dasd_smalloc_request(dasd_eckd_discipline.name,
-				   1 /* PSF */  + 1 /* RSSD */ ,
+	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */  + 1 /* RSSD */,
 				   (sizeof(struct dasd_psf_prssd_data) +
 				    sizeof(struct dasd_rssd_perf_stats_t)),
 				   device);
@@ -2828,7 +2821,7 @@
 	}
 
 	/* setup CCWs for PSF + RSSD */
-	cqr = dasd_smalloc_request("ECKD", 2 , 0, device);
+	cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 2 , 0, device);
 	if (IS_ERR(cqr)) {
 		DBF_DEV_EVENT(DBF_WARNING, device, "%s",
 			"Could not allocate initialization request");
@@ -3254,7 +3247,7 @@
 
 	/* Read Device Characteristics */
 	memset(&private->rdc_data, 0, sizeof(private->rdc_data));
-	rc = dasd_generic_read_dev_chars(device, "ECKD",
+	rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC,
 					 &private->rdc_data, 64);
 	if (rc) {
 		DBF_EVENT(DBF_WARNING,

diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c
index c24c8c3..d96039e 100644
--- a/drivers/s390/block/dasd_eer.c
+++ b/drivers/s390/block/dasd_eer.c

@@ -6,7 +6,7 @@
  *  Author(s): Stefan Weinhuber <wein@de.ibm.com>
  */
 
-#define KMSG_COMPONENT "dasd"
+#define KMSG_COMPONENT "dasd-eckd"
 
 #include <linux/init.h>
 #include <linux/fs.h>
@@ -464,7 +464,7 @@
 	if (!device->discipline || strcmp(device->discipline->name, "ECKD"))
 		return -EPERM;	/* FIXME: -EMEDIUMTYPE ? */
 
-	cqr = dasd_kmalloc_request("ECKD", 1 /* SNSS */,
+	cqr = dasd_kmalloc_request(DASD_ECKD_MAGIC, 1 /* SNSS */,
 				   SNSS_DATA_SIZE, device);
 	if (IS_ERR(cqr))
 		return -ENOMEM;

diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c
index cb8f9ce..7656384 100644
--- a/drivers/s390/block/dasd_erp.c
+++ b/drivers/s390/block/dasd_erp.c

@@ -99,8 +99,8 @@
 		cqr->lpm    = LPM_ANYPATH;
 		cqr->status = DASD_CQR_FILLED;
         } else {
-		dev_err(&device->cdev->dev,
-			"default ERP has run out of retries and failed\n");
+		pr_err("%s: default ERP has run out of retries and failed\n",
+		       dev_name(&device->cdev->dev));
 		cqr->status = DASD_CQR_FAILED;
 		cqr->stopclk = get_clock();
         }

diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 31849ad..f245377 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c

@@ -5,7 +5,7 @@
  * Copyright IBM Corp. 1999, 2009
  */
 
-#define KMSG_COMPONENT "dasd"
+#define KMSG_COMPONENT "dasd-fba"
 
 #include <linux/stddef.h>
 #include <linux/kernel.h>
@@ -152,8 +152,8 @@
 	block->base = device;
 
 	/* Read Device Characteristics */
-	rc = dasd_generic_read_dev_chars(device, "FBA ", &private->rdc_data,
-					 32);
+	rc = dasd_generic_read_dev_chars(device, DASD_FBA_MAGIC,
+					 &private->rdc_data, 32);
 	if (rc) {
 		DBF_EVENT(DBF_WARNING, "Read device characteristics returned "
 			  "error %d for device: %s",
@@ -305,8 +305,7 @@
 		datasize += (count - 1)*sizeof(struct LO_fba_data);
 	}
 	/* Allocate the ccw request. */
-	cqr = dasd_smalloc_request(dasd_fba_discipline.name,
-				   cplength, datasize, memdev);
+	cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev);
 	if (IS_ERR(cqr))
 		return cqr;
 	ccw = cqr->cpaddr;

diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index b699ca3..5e47a1e 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h

@@ -59,6 +59,11 @@
 #include <asm/dasd.h>
 #include <asm/idals.h>
 
+/* DASD discipline magic */
+#define DASD_ECKD_MAGIC 0xC5C3D2C4
+#define DASD_DIAG_MAGIC 0xC4C9C1C7
+#define DASD_FBA_MAGIC 0xC6C2C140
+
 /*
  * SECTION: Type definitions
  */
@@ -540,9 +545,9 @@
 extern struct kmem_cache *dasd_page_cache;
 
 struct dasd_ccw_req *
-dasd_kmalloc_request(char *, int, int, struct dasd_device *);
+dasd_kmalloc_request(int , int, int, struct dasd_device *);
 struct dasd_ccw_req *
-dasd_smalloc_request(char *, int, int, struct dasd_device *);
+dasd_smalloc_request(int , int, int, struct dasd_device *);
 void dasd_kfree_request(struct dasd_ccw_req *, struct dasd_device *);
 void dasd_sfree_request(struct dasd_ccw_req *, struct dasd_device *);
 
@@ -587,7 +592,7 @@
 int dasd_generic_pm_freeze(struct ccw_device *);
 int dasd_generic_restore_device(struct ccw_device *);
 
-int dasd_generic_read_dev_chars(struct dasd_device *, char *, void *, int);
+int dasd_generic_read_dev_chars(struct dasd_device *, int, void *, int);
 char *dasd_get_sense(struct irb *);
 
 /* externals in dasd_devmap.c */

diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index df918ef..f756a1b 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c

@@ -98,8 +98,8 @@
 	if (!capable (CAP_SYS_ADMIN))
 		return -EACCES;
 
-	dev_info(&base->cdev->dev, "The DASD has been put in the quiesce "
-		 "state\n");
+	pr_info("%s: The DASD has been put in the quiesce "
+		"state\n", dev_name(&base->cdev->dev));
 	spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
 	base->stopped |= DASD_STOPPED_QUIESCE;
 	spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
@@ -119,8 +119,8 @@
 	if (!capable (CAP_SYS_ADMIN))
 		return -EACCES;
 
-	dev_info(&base->cdev->dev, "I/O operations have been resumed "
-		 "on the DASD\n");
+	pr_info("%s: I/O operations have been resumed "
+		"on the DASD\n", dev_name(&base->cdev->dev));
 	spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
 	base->stopped &= ~DASD_STOPPED_QUIESCE;
 	spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
@@ -146,8 +146,8 @@
 		return -EPERM;
 
 	if (base->state != DASD_STATE_BASIC) {
-		dev_warn(&base->cdev->dev,
-			 "The DASD cannot be formatted while it is enabled\n");
+		pr_warning("%s: The DASD cannot be formatted while it is "
+			   "enabled\n",  dev_name(&base->cdev->dev));
 		return -EBUSY;
 	}
 
@@ -175,9 +175,9 @@
 		dasd_sfree_request(cqr, cqr->memdev);
 		if (rc) {
 			if (rc != -ERESTARTSYS)
-				dev_err(&base->cdev->dev,
-					"Formatting unit %d failed with "
-					"rc=%d\n", fdata->start_unit, rc);
+				pr_err("%s: Formatting unit %d failed with "
+				       "rc=%d\n", dev_name(&base->cdev->dev),
+				       fdata->start_unit, rc);
 			return rc;
 		}
 		fdata->start_unit++;
@@ -204,9 +204,9 @@
 	if (copy_from_user(&fdata, argp, sizeof(struct format_data_t)))
 		return -EFAULT;
 	if (bdev != bdev->bd_contains) {
-		dev_warn(&block->base->cdev->dev,
-			 "The specified DASD is a partition and cannot be "
-			 "formatted\n");
+		pr_warning("%s: The specified DASD is a partition and cannot "
+			   "be formatted\n",
+			   dev_name(&block->base->cdev->dev));
 		return -EINVAL;
 	}
 	return dasd_format(block, &fdata);

diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index db442cd..ee604e9 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c

@@ -42,7 +42,6 @@
 #include <linux/suspend.h>
 #include <linux/platform_device.h>
 #include <asm/uaccess.h>
-#include <asm/checksum.h>
 
 #define XPRAM_NAME	"xpram"
 #define XPRAM_DEVS	1	/* one partition */
@@ -51,7 +50,6 @@
 typedef struct {
 	unsigned int	size;		/* size of xpram segment in pages */
 	unsigned int	offset;		/* start page of xpram segment */
-	unsigned int	csum;		/* partition checksum for suspend */
 } xpram_device_t;
 
 static xpram_device_t xpram_devices[XPRAM_MAX_DEVS];
@@ -387,58 +385,6 @@
 }
 
 /*
- * Save checksums for all partitions.
- */
-static int xpram_save_checksums(void)
-{
-	unsigned long mem_page;
-	int rc, i;
-
-	rc = 0;
-	mem_page = (unsigned long) __get_free_page(GFP_KERNEL);
-	if (!mem_page)
-		return -ENOMEM;
-	for (i = 0; i < xpram_devs; i++) {
-		rc = xpram_page_in(mem_page, xpram_devices[i].offset);
-		if (rc)
-			goto fail;
-		xpram_devices[i].csum = csum_partial((const void *) mem_page,
-						     PAGE_SIZE, 0);
-	}
-fail:
-	free_page(mem_page);
-	return rc ? -ENXIO : 0;
-}
-
-/*
- * Verify checksums for all partitions.
- */
-static int xpram_validate_checksums(void)
-{
-	unsigned long mem_page;
-	unsigned int csum;
-	int rc, i;
-
-	rc = 0;
-	mem_page = (unsigned long) __get_free_page(GFP_KERNEL);
-	if (!mem_page)
-		return -ENOMEM;
-	for (i = 0; i < xpram_devs; i++) {
-		rc = xpram_page_in(mem_page, xpram_devices[i].offset);
-		if (rc)
-			goto fail;
-		csum = csum_partial((const void *) mem_page, PAGE_SIZE, 0);
-		if (xpram_devices[i].csum != csum) {
-			rc = -EINVAL;
-			goto fail;
-		}
-	}
-fail:
-	free_page(mem_page);
-	return rc ? -ENXIO : 0;
-}
-
-/*
  * Resume failed: Print error message and call panic.
  */
 static void xpram_resume_error(const char *message)
@@ -458,21 +404,10 @@
 		xpram_resume_error("xpram disappeared");
 	if (xpram_pages != xpram_highest_page_index() + 1)
 		xpram_resume_error("Size of xpram changed");
-	if (xpram_validate_checksums())
-		xpram_resume_error("Data of xpram changed");
 	return 0;
 }
 
-/*
- * Save necessary state in suspend.
- */
-static int xpram_freeze(struct device *dev)
-{
-	return xpram_save_checksums();
-}
-
 static struct dev_pm_ops xpram_pm_ops = {
-	.freeze		= xpram_freeze,
 	.restore	= xpram_restore,
 };
 

diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig
index 0769ced..4e34d36 100644
--- a/drivers/s390/char/Kconfig
+++ b/drivers/s390/char/Kconfig

@@ -82,6 +82,16 @@
 	  You should only select this option if you know what you are doing,
 	  need this feature and intend to run your kernel in LPAR.
 
+config SCLP_ASYNC
+	tristate "Support for Call Home via Asynchronous SCLP Records"
+	depends on S390
+	help
+	  This option enables the call home function, which is able to inform
+	  the service element and connected organisations about a kernel panic.
+	  You should only select this option if you know what you are doing,
+	  want for inform other people about your kernel panics,
+	  need this feature and intend to run your kernel in LPAR.
+
 config S390_TAPE
 	tristate "S/390 tape device support"
 	depends on CCW

diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index 7e73e39..efb500a 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile

@@ -16,6 +16,7 @@
 obj-$(CONFIG_SCLP_CONSOLE) += sclp_con.o
 obj-$(CONFIG_SCLP_VT220_TTY) += sclp_vt220.o
 obj-$(CONFIG_SCLP_CPI) += sclp_cpi.o
+obj-$(CONFIG_SCLP_ASYNC) += sclp_async.o
 
 obj-$(CONFIG_ZVM_WATCHDOG) += vmwatchdog.o
 obj-$(CONFIG_VMLOGRDR) += vmlogrdr.o

diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c
index 3234e90..89ece1c 100644
--- a/drivers/s390/char/monreader.c
+++ b/drivers/s390/char/monreader.c

@@ -581,7 +581,7 @@
 	monreader_device->release = (void (*)(struct device *))kfree;
 	rc = device_register(monreader_device);
 	if (rc) {
-		kfree(monreader_device);
+		put_device(monreader_device);
 		goto out_driver;
 	}
 

diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index 60e7cb0..6bb5a6b 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h

@@ -27,6 +27,7 @@
 #define EVTYP_VT220MSG		0x1A
 #define EVTYP_CONFMGMDATA	0x04
 #define EVTYP_SDIAS		0x1C
+#define EVTYP_ASYNC		0x0A
 
 #define EVTYP_OPCMD_MASK	0x80000000
 #define EVTYP_MSG_MASK		0x40000000
@@ -38,6 +39,7 @@
 #define EVTYP_VT220MSG_MASK	0x00000040
 #define EVTYP_CONFMGMDATA_MASK	0x10000000
 #define EVTYP_SDIAS_MASK	0x00000010
+#define EVTYP_ASYNC_MASK	0x00400000
 
 #define GNRLMSGFLGS_DOM		0x8000
 #define GNRLMSGFLGS_SNDALRM	0x4000
@@ -85,12 +87,12 @@
 } __attribute__((packed));
 
 extern u64 sclp_facilities;
-
 #define SCLP_HAS_CHP_INFO	(sclp_facilities & 0x8000000000000000ULL)
 #define SCLP_HAS_CHP_RECONFIG	(sclp_facilities & 0x2000000000000000ULL)
 #define SCLP_HAS_CPU_INFO	(sclp_facilities & 0x0800000000000000ULL)
 #define SCLP_HAS_CPU_RECONFIG	(sclp_facilities & 0x0400000000000000ULL)
 
+
 struct gds_subvector {
 	u8	length;
 	u8	key;

diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c
new file mode 100644
index 0000000..daaec18
--- /dev/null
+++ b/drivers/s390/char/sclp_async.c

@@ -0,0 +1,224 @@
+/*
+ * Enable Asynchronous Notification via SCLP.
+ *
+ * Copyright IBM Corp. 2009
+ * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/kmod.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/sysctl.h>
+#include <linux/utsname.h>
+#include "sclp.h"
+
+static int callhome_enabled;
+static struct sclp_req *request;
+static struct sclp_async_sccb *sccb;
+static int sclp_async_send_wait(char *message);
+static struct ctl_table_header *callhome_sysctl_header;
+static DEFINE_SPINLOCK(sclp_async_lock);
+static char nodename[64];
+#define SCLP_NORMAL_WRITE	0x00
+
+struct async_evbuf {
+	struct evbuf_header header;
+	u64 reserved;
+	u8 rflags;
+	u8 empty;
+	u8 rtype;
+	u8 otype;
+	char comp_id[12];
+	char data[3000]; /* there is still some space left */
+} __attribute__((packed));
+
+struct sclp_async_sccb {
+	struct sccb_header header;
+	struct async_evbuf evbuf;
+} __attribute__((packed));
+
+static struct sclp_register sclp_async_register = {
+	.send_mask = EVTYP_ASYNC_MASK,
+};
+
+static int call_home_on_panic(struct notifier_block *self,
+			      unsigned long event, void *data)
+{
+		strncat(data, nodename, strlen(nodename));
+		sclp_async_send_wait(data);
+		return NOTIFY_DONE;
+}
+
+static struct notifier_block call_home_panic_nb = {
+	.notifier_call = call_home_on_panic,
+	.priority = INT_MAX,
+};
+
+static int proc_handler_callhome(ctl_table *ctl, int write, struct file *filp,
+				 void __user *buffer, size_t *count,
+				 loff_t *ppos)
+{
+	unsigned long val;
+	int len, rc;
+	char buf[2];
+
+	if (!*count | (*ppos && !write)) {
+		*count = 0;
+		return 0;
+	}
+	if (!write) {
+		len =  sprintf(buf, "%d\n", callhome_enabled);
+		buf[len] = '\0';
+		rc = copy_to_user(buffer, buf, sizeof(buf));
+		if (rc != 0)
+			return -EFAULT;
+	} else {
+		len = *count;
+		rc = copy_from_user(buf, buffer, sizeof(buf));
+		if (rc != 0)
+			return -EFAULT;
+		if (strict_strtoul(buf, 0, &val) != 0)
+			return -EINVAL;
+		if (val != 0 && val != 1)
+			return -EINVAL;
+		callhome_enabled = val;
+	}
+	*count = len;
+	*ppos += len;
+	return 0;
+}
+
+static struct ctl_table callhome_table[] = {
+	{
+		.procname	= "callhome",
+		.mode		= 0644,
+		.proc_handler	= &proc_handler_callhome,
+	},
+	{ .ctl_name = 0 }
+};
+
+static struct ctl_table kern_dir_table[] = {
+	{
+		.ctl_name	= CTL_KERN,
+		.procname	= "kernel",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= callhome_table,
+	},
+	{ .ctl_name = 0 }
+};
+
+/*
+ * Function used to transfer asynchronous notification
+ * records which waits for send completion
+ */
+static int sclp_async_send_wait(char *message)
+{
+	struct async_evbuf *evb;
+	int rc;
+	unsigned long flags;
+
+	if (!callhome_enabled)
+		return 0;
+	sccb->evbuf.header.type = EVTYP_ASYNC;
+	sccb->evbuf.rtype = 0xA5;
+	sccb->evbuf.otype = 0x00;
+	evb = &sccb->evbuf;
+	request->command = SCLP_CMDW_WRITE_EVENT_DATA;
+	request->sccb = sccb;
+	request->status = SCLP_REQ_FILLED;
+	strncpy(sccb->evbuf.data, message, sizeof(sccb->evbuf.data));
+	/*
+	 * Retain Queue
+	 * e.g. 5639CC140 500 Red Hat RHEL5 Linux for zSeries (RHEL AS)
+	 */
+	strncpy(sccb->evbuf.comp_id, "000000000", sizeof(sccb->evbuf.comp_id));
+	sccb->evbuf.header.length = sizeof(sccb->evbuf);
+	sccb->header.length = sizeof(sccb->evbuf) + sizeof(sccb->header);
+	sccb->header.function_code = SCLP_NORMAL_WRITE;
+	rc = sclp_add_request(request);
+	if (rc)
+		return rc;
+	spin_lock_irqsave(&sclp_async_lock, flags);
+	while (request->status != SCLP_REQ_DONE &&
+		request->status != SCLP_REQ_FAILED) {
+		 sclp_sync_wait();
+	}
+	spin_unlock_irqrestore(&sclp_async_lock, flags);
+	if (request->status != SCLP_REQ_DONE)
+		return -EIO;
+	rc = ((struct sclp_async_sccb *)
+	       request->sccb)->header.response_code;
+	if (rc != 0x0020)
+		return -EIO;
+	if (evb->header.flags != 0x80)
+		return -EIO;
+	return rc;
+}
+
+static int __init sclp_async_init(void)
+{
+	int rc;
+
+	rc = sclp_register(&sclp_async_register);
+	if (rc)
+		return rc;
+	callhome_sysctl_header = register_sysctl_table(kern_dir_table);
+	if (!callhome_sysctl_header) {
+		rc = -ENOMEM;
+		goto out_sclp;
+	}
+	if (!(sclp_async_register.sclp_receive_mask & EVTYP_ASYNC_MASK)) {
+		rc = -EOPNOTSUPP;
+		goto out_sclp;
+	}
+	rc = -ENOMEM;
+	request = kzalloc(sizeof(struct sclp_req), GFP_KERNEL);
+	if (!request)
+		goto out_sys;
+	sccb = (struct sclp_async_sccb *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	if (!sccb)
+		goto out_mem;
+	rc =  atomic_notifier_chain_register(&panic_notifier_list,
+					     &call_home_panic_nb);
+	if (rc)
+		goto out_mem;
+
+	strncpy(nodename, init_utsname()->nodename, 64);
+	return 0;
+
+out_mem:
+	kfree(request);
+	free_page((unsigned long) sccb);
+out_sys:
+	unregister_sysctl_table(callhome_sysctl_header);
+out_sclp:
+	sclp_unregister(&sclp_async_register);
+	return rc;
+
+}
+module_init(sclp_async_init);
+
+static void __exit sclp_async_exit(void)
+{
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+					 &call_home_panic_nb);
+	unregister_sysctl_table(callhome_sysctl_header);
+	sclp_unregister(&sclp_async_register);
+	free_page((unsigned long) sccb);
+	kfree(request);
+}
+module_exit(sclp_async_exit);
+
+MODULE_AUTHOR("Copyright IBM Corp. 2009");
+MODULE_AUTHOR("Hans-Joachim Picht <hans@linux.vnet.ibm.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SCLP Asynchronous Notification Records");

diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c
index 5a519fa..2fe45ff 100644
--- a/drivers/s390/char/tape_34xx.c
+++ b/drivers/s390/char/tape_34xx.c

@@ -8,7 +8,7 @@
  *		 Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
-#define KMSG_COMPONENT "tape"
+#define KMSG_COMPONENT "tape_34xx"
 
 #include <linux/module.h>
 #include <linux/init.h>

diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c
index 418f72d..e4cc3aa 100644
--- a/drivers/s390/char/tape_3590.c
+++ b/drivers/s390/char/tape_3590.c

@@ -8,7 +8,7 @@
  *		 Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
-#define KMSG_COMPONENT "tape"
+#define KMSG_COMPONENT "tape_3590"
 
 #include <linux/module.h>
 #include <linux/init.h>
@@ -39,8 +39,6 @@
  * - Read Alternate:		 implemented
  *******************************************************************/
 
-#define KMSG_COMPONENT "tape"
-
 static const char *tape_3590_msg[TAPE_3590_MAX_MSG] = {
 	[0x00] = "",
 	[0x10] = "Lost Sense",

diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 47ff695..4cb9e70 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c

@@ -302,8 +302,6 @@
 	if (!device->blk_data.medium_changed)
 		return 0;
 
-	dev_info(&device->cdev->dev, "Determining the size of the recorded "
-		"area...\n");
 	rc = tape_mtop(device, MTFSFM, 1);
 	if (rc)
 		return rc;
@@ -312,6 +310,8 @@
 	if (rc < 0)
 		return rc;
 
+	pr_info("%s: Determining the size of the recorded area...\n",
+		dev_name(&device->cdev->dev));
 	DBF_LH(3, "Image file ends at %d\n", rc);
 	nr_of_blks = rc;
 
@@ -330,8 +330,8 @@
 	device->bof = rc;
 	nr_of_blks -= rc;
 
-	dev_info(&device->cdev->dev, "The size of the recorded area is %i "
-		"blocks\n", nr_of_blks);
+	pr_info("%s: The size of the recorded area is %i blocks\n",
+		dev_name(&device->cdev->dev), nr_of_blks);
 	set_capacity(device->blk_data.disk,
 		nr_of_blks*(TAPEBLOCK_HSEC_SIZE/512));
 
@@ -366,8 +366,8 @@
 
 	if (device->required_tapemarks) {
 		DBF_EVENT(2, "TBLOCK: missing tapemarks\n");
-		dev_warn(&device->cdev->dev, "Opening the tape failed because"
-			" of missing end-of-file marks\n");
+		pr_warning("%s: Opening the tape failed because of missing "
+			   "end-of-file marks\n", dev_name(&device->cdev->dev));
 		rc = -EPERM;
 		goto put_device;
 	}

diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c
index 1d420d9..5cd31e0 100644
--- a/drivers/s390/char/tape_core.c
+++ b/drivers/s390/char/tape_core.c

@@ -214,13 +214,15 @@
 	switch(newstate){
 	case MS_UNLOADED:
 		device->tape_generic_status |= GMT_DR_OPEN(~0);
-		dev_info(&device->cdev->dev, "The tape cartridge has been "
-			"successfully unloaded\n");
+		if (device->medium_state == MS_LOADED)
+			pr_info("%s: The tape cartridge has been successfully "
+				"unloaded\n", dev_name(&device->cdev->dev));
 		break;
 	case MS_LOADED:
 		device->tape_generic_status &= ~GMT_DR_OPEN(~0);
-		dev_info(&device->cdev->dev, "A tape cartridge has been "
-			"mounted\n");
+		if (device->medium_state == MS_UNLOADED)
+			pr_info("%s: A tape cartridge has been mounted\n",
+				dev_name(&device->cdev->dev));
 		break;
 	default:
 		// print nothing
@@ -358,11 +360,11 @@
 
 out_char:
 	tapechar_cleanup_device(device);
+out_minor:
+	tape_remove_minor(device);
 out_discipline:
 	device->discipline->cleanup_device(device);
 	device->discipline = NULL;
-out_minor:
-	tape_remove_minor(device);
 out:
 	module_put(discipline->owner);
 	return rc;
@@ -654,8 +656,8 @@
 			 */
 			DBF_EVENT(3, "(%08x): Drive in use vanished!\n",
 				device->cdev_id);
-			dev_warn(&device->cdev->dev, "A tape unit was detached"
-				" while in use\n");
+			pr_warning("%s: A tape unit was detached while in "
+				   "use\n", dev_name(&device->cdev->dev));
 			tape_state_set(device, TS_NOT_OPER);
 			__tape_discard_requests(device);
 			spin_unlock_irq(get_ccwdev_lock(device->cdev));

diff --git a/drivers/s390/char/tape_std.c b/drivers/s390/char/tape_std.c
index 1a9420b..750354a 100644
--- a/drivers/s390/char/tape_std.c
+++ b/drivers/s390/char/tape_std.c

@@ -68,7 +68,7 @@
 	 * to another host (actually this shouldn't happen but it does).
 	 * So we set up a timeout for this call.
 	 */
-	init_timer(&timeout);
+	init_timer_on_stack(&timeout);
 	timeout.function = tape_std_assign_timeout;
 	timeout.data     = (unsigned long) request;
 	timeout.expires  = jiffies + 2 * HZ;

diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index c20a4fe..d1a142fa 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c

@@ -765,8 +765,10 @@
 	} else
 		return -ENOMEM;
 	ret = device_register(dev);
-	if (ret)
+	if (ret) {
+		put_device(dev);
 		return ret;
+	}
 
 	ret = sysfs_create_group(&dev->kobj, &vmlogrdr_attr_group);
 	if (ret) {

diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index 31b902e..77571b6 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c

@@ -1026,9 +1026,15 @@
 
 	debug_set_level(vmur_dbf, 6);
 
+	vmur_class = class_create(THIS_MODULE, "vmur");
+	if (IS_ERR(vmur_class)) {
+		rc = PTR_ERR(vmur_class);
+		goto fail_free_dbf;
+	}
+
 	rc = ccw_driver_register(&ur_driver);
 	if (rc)
-		goto fail_free_dbf;
+		goto fail_class_destroy;
 
 	rc = alloc_chrdev_region(&dev, 0, NUM_MINORS, "vmur");
 	if (rc) {
@@ -1038,18 +1044,13 @@
 	}
 	ur_first_dev_maj_min = MKDEV(MAJOR(dev), 0);
 
-	vmur_class = class_create(THIS_MODULE, "vmur");
-	if (IS_ERR(vmur_class)) {
-		rc = PTR_ERR(vmur_class);
-		goto fail_unregister_region;
-	}
 	pr_info("%s loaded.\n", ur_banner);
 	return 0;
 
-fail_unregister_region:
-	unregister_chrdev_region(ur_first_dev_maj_min, NUM_MINORS);
 fail_unregister_driver:
 	ccw_driver_unregister(&ur_driver);
+fail_class_destroy:
+	class_destroy(vmur_class);
 fail_free_dbf:
 	debug_unregister(vmur_dbf);
 	return rc;
@@ -1057,9 +1058,9 @@
 
 static void __exit ur_exit(void)
 {
-	class_destroy(vmur_class);
 	unregister_chrdev_region(ur_first_dev_maj_min, NUM_MINORS);
 	ccw_driver_unregister(&ur_driver);
+	class_destroy(vmur_class);
 	debug_unregister(vmur_dbf);
 	pr_info("%s unloaded.\n", ur_banner);
 }

diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 1bbae43..c431198 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c

@@ -275,7 +275,7 @@
 	u32 num_pages;
 	u32 pad1;
 	u64 tod;
-	cpuid_t cpu_id;
+	struct cpuid cpu_id;
 	u32 arch_id;
 	u32 volnr;
 	u32 build_arch;

diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index adb3dd3..fa4c966 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile

@@ -2,7 +2,7 @@
 # Makefile for the S/390 common i/o drivers
 #
 
-obj-y += airq.o blacklist.o chsc.o cio.o css.o chp.o idset.o isc.o scsw.o \
+obj-y += airq.o blacklist.o chsc.o cio.o css.o chp.o idset.o isc.o \
 	fcx.o itcw.o crw.o
 ccw_device-objs += device.o device_fsm.o device_ops.o
 ccw_device-objs += device_id.o device_pgid.o device_status.o

diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 3e5f304..4000283 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c

@@ -417,7 +417,8 @@
 	if (ret) {
 		CIO_MSG_EVENT(0, "Could not register chp%x.%02x: %d\n",
 			      chpid.cssid, chpid.id, ret);
-		goto out_free;
+		put_device(&chp->dev);
+		goto out;
 	}
 	ret = sysfs_create_group(&chp->dev.kobj, &chp_attr_group);
 	if (ret) {

diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 425e8f8..37aa611 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h

@@ -37,29 +37,6 @@
 
 struct channel_path;
 
-struct css_general_char {
-	u64 : 12;
-	u32 dynio : 1;	 /* bit 12 */
-	u32 : 28;
-	u32 aif : 1;     /* bit 41 */
-	u32 : 3;
-	u32 mcss : 1;    /* bit 45 */
-	u32 fcs : 1;	 /* bit 46 */
-	u32 : 1;
-	u32 ext_mb : 1;  /* bit 48 */
-	u32 : 7;
-	u32 aif_tdd : 1; /* bit 56 */
-	u32 : 1;
-	u32 qebsm : 1;   /* bit 58 */
-	u32 : 8;
-	u32 aif_osa : 1; /* bit 67 */
-	u32 : 14;
-	u32 cib : 1;	 /* bit 82 */
-	u32 : 5;
-	u32 fcx : 1;	 /* bit 88 */
-	u32 : 7;
-}__attribute__((packed));
-
 struct css_chsc_char {
 	u64 res;
 	u64 : 20;
@@ -72,7 +49,6 @@
 	u32 : 19;
 }__attribute__((packed));
 
-extern struct css_general_char css_general_characteristics;
 extern struct css_chsc_char css_chsc_characteristics;
 
 struct chsc_ssd_info {

diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 5ec7789..138124f 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c

@@ -139,12 +139,11 @@
 	       __u8 lpm,		/* logical path mask */
 	       __u8 key)                /* storage key */
 {
-	char dbf_txt[15];
 	int ccode;
 	union orb *orb;
 
-	CIO_TRACE_EVENT(4, "stIO");
-	CIO_TRACE_EVENT(4, dev_name(&sch->dev));
+	CIO_TRACE_EVENT(5, "stIO");
+	CIO_TRACE_EVENT(5, dev_name(&sch->dev));
 
 	orb = &to_io_private(sch)->orb;
 	memset(orb, 0, sizeof(union orb));
@@ -169,8 +168,7 @@
 	ccode = ssch(sch->schid, orb);
 
 	/* process condition code */
-	sprintf(dbf_txt, "ccode:%d", ccode);
-	CIO_TRACE_EVENT(4, dbf_txt);
+	CIO_HEX_EVENT(5, &ccode, sizeof(ccode));
 
 	switch (ccode) {
 	case 0:
@@ -201,16 +199,14 @@
 int
 cio_resume (struct subchannel *sch)
 {
-	char dbf_txt[15];
 	int ccode;
 
-	CIO_TRACE_EVENT (4, "resIO");
+	CIO_TRACE_EVENT(4, "resIO");
 	CIO_TRACE_EVENT(4, dev_name(&sch->dev));
 
 	ccode = rsch (sch->schid);
 
-	sprintf (dbf_txt, "ccode:%d", ccode);
-	CIO_TRACE_EVENT (4, dbf_txt);
+	CIO_HEX_EVENT(4, &ccode, sizeof(ccode));
 
 	switch (ccode) {
 	case 0:
@@ -235,13 +231,12 @@
 int
 cio_halt(struct subchannel *sch)
 {
-	char dbf_txt[15];
 	int ccode;
 
 	if (!sch)
 		return -ENODEV;
 
-	CIO_TRACE_EVENT (2, "haltIO");
+	CIO_TRACE_EVENT(2, "haltIO");
 	CIO_TRACE_EVENT(2, dev_name(&sch->dev));
 
 	/*
@@ -249,8 +244,7 @@
 	 */
 	ccode = hsch (sch->schid);
 
-	sprintf (dbf_txt, "ccode:%d", ccode);
-	CIO_TRACE_EVENT (2, dbf_txt);
+	CIO_HEX_EVENT(2, &ccode, sizeof(ccode));
 
 	switch (ccode) {
 	case 0:
@@ -270,13 +264,12 @@
 int
 cio_clear(struct subchannel *sch)
 {
-	char dbf_txt[15];
 	int ccode;
 
 	if (!sch)
 		return -ENODEV;
 
-	CIO_TRACE_EVENT (2, "clearIO");
+	CIO_TRACE_EVENT(2, "clearIO");
 	CIO_TRACE_EVENT(2, dev_name(&sch->dev));
 
 	/*
@@ -284,8 +277,7 @@
 	 */
 	ccode = csch (sch->schid);
 
-	sprintf (dbf_txt, "ccode:%d", ccode);
-	CIO_TRACE_EVENT (2, dbf_txt);
+	CIO_HEX_EVENT(2, &ccode, sizeof(ccode));
 
 	switch (ccode) {
 	case 0:
@@ -306,19 +298,17 @@
 int
 cio_cancel (struct subchannel *sch)
 {
-	char dbf_txt[15];
 	int ccode;
 
 	if (!sch)
 		return -ENODEV;
 
-	CIO_TRACE_EVENT (2, "cancelIO");
+	CIO_TRACE_EVENT(2, "cancelIO");
 	CIO_TRACE_EVENT(2, dev_name(&sch->dev));
 
 	ccode = xsch (sch->schid);
 
-	sprintf (dbf_txt, "ccode:%d", ccode);
-	CIO_TRACE_EVENT (2, dbf_txt);
+	CIO_HEX_EVENT(2, &ccode, sizeof(ccode));
 
 	switch (ccode) {
 	case 0:		/* success */
@@ -429,11 +419,10 @@
  */
 int cio_enable_subchannel(struct subchannel *sch, u32 intparm)
 {
-	char dbf_txt[15];
 	int retry;
 	int ret;
 
-	CIO_TRACE_EVENT (2, "ensch");
+	CIO_TRACE_EVENT(2, "ensch");
 	CIO_TRACE_EVENT(2, dev_name(&sch->dev));
 
 	if (sch_is_pseudo_sch(sch))
@@ -460,8 +449,7 @@
 		} else
 			break;
 	}
-	sprintf (dbf_txt, "ret:%d", ret);
-	CIO_TRACE_EVENT (2, dbf_txt);
+	CIO_HEX_EVENT(2, &ret, sizeof(ret));
 	return ret;
 }
 EXPORT_SYMBOL_GPL(cio_enable_subchannel);
@@ -472,11 +460,10 @@
  */
 int cio_disable_subchannel(struct subchannel *sch)
 {
-	char dbf_txt[15];
 	int retry;
 	int ret;
 
-	CIO_TRACE_EVENT (2, "dissch");
+	CIO_TRACE_EVENT(2, "dissch");
 	CIO_TRACE_EVENT(2, dev_name(&sch->dev));
 
 	if (sch_is_pseudo_sch(sch))
@@ -495,8 +482,7 @@
 		} else
 			break;
 	}
-	sprintf (dbf_txt, "ret:%d", ret);
-	CIO_TRACE_EVENT (2, dbf_txt);
+	CIO_HEX_EVENT(2, &ret, sizeof(ret));
 	return ret;
 }
 EXPORT_SYMBOL_GPL(cio_disable_subchannel);
@@ -578,11 +564,6 @@
 			goto out;
 	}
 	mutex_init(&sch->reg_mutex);
-	/* Set a name for the subchannel */
-	if (cio_is_console(schid))
-		sch->dev.init_name = cio_get_console_sch_name(schid);
-	else
-		dev_set_name(&sch->dev, "0.%x.%04x", schid.ssid, schid.sch_no);
 
 	/*
 	 * The first subchannel that is not-operational (ccode==3)
@@ -686,7 +667,6 @@
 
 #ifdef CONFIG_CCW_CONSOLE
 static struct subchannel console_subchannel;
-static char console_sch_name[10] = "0.x.xxxx";
 static struct io_subchannel_private console_priv;
 static int console_subchannel_in_use;
 
@@ -873,12 +853,6 @@
 	return &console_subchannel;
 }
 
-const char *cio_get_console_sch_name(struct subchannel_id schid)
-{
-	snprintf(console_sch_name, 10, "0.%x.%04x", schid.ssid, schid.sch_no);
-	return (const char *)console_sch_name;
-}
-
 #endif
 static int
 __disable_subchannel_easy(struct subchannel_id schid, struct schib *schib)

diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index 5150fba..2e43558 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h

@@ -133,15 +133,11 @@
 extern struct subchannel *cio_get_console_subchannel(void);
 extern spinlock_t * cio_get_console_lock(void);
 extern void *cio_get_console_priv(void);
-extern const char *cio_get_console_sch_name(struct subchannel_id schid);
-extern const char *cio_get_console_cdev_name(struct subchannel *sch);
 #else
 #define cio_is_console(schid) 0
 #define cio_get_console_subchannel() NULL
 #define cio_get_console_lock() NULL
 #define cio_get_console_priv() NULL
-#define cio_get_console_sch_name(schid) NULL
-#define cio_get_console_cdev_name(sch) NULL
 #endif
 
 #endif

diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 85d43c6..e995123 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c

@@ -152,24 +152,15 @@
 }
 
 static void
-css_free_subchannel(struct subchannel *sch)
-{
-	if (sch) {
-		/* Reset intparm to zeroes. */
-		sch->config.intparm = 0;
-		cio_commit_config(sch);
-		kfree(sch->lock);
-		kfree(sch);
-	}
-}
-
-static void
 css_subchannel_release(struct device *dev)
 {
 	struct subchannel *sch;
 
 	sch = to_subchannel(dev);
 	if (!cio_is_console(sch->schid)) {
+		/* Reset intparm to zeroes. */
+		sch->config.intparm = 0;
+		cio_commit_config(sch);
 		kfree(sch->lock);
 		kfree(sch);
 	}
@@ -180,6 +171,8 @@
 	int ret;
 
 	mutex_lock(&sch->reg_mutex);
+	dev_set_name(&sch->dev, "0.%x.%04x", sch->schid.ssid,
+		     sch->schid.sch_no);
 	ret = device_register(&sch->dev);
 	mutex_unlock(&sch->reg_mutex);
 	return ret;
@@ -327,7 +320,7 @@
 		return PTR_ERR(sch);
 	ret = css_register_subchannel(sch);
 	if (ret)
-		css_free_subchannel(sch);
+		put_device(&sch->dev);
 	return ret;
 }
 
@@ -644,7 +637,10 @@
 	 * not working) so we do it now. This is true e.g. for the
 	 * console subchannel.
 	 */
-	css_register_subchannel(sch);
+	if (css_register_subchannel(sch)) {
+		if (!cio_is_console(schid))
+			put_device(&sch->dev);
+	}
 	return 0;
 }
 
@@ -661,8 +657,8 @@
 		css->global_pgid.pgid_high.cpu_addr = 0;
 #endif
 	}
-	css->global_pgid.cpu_id = ((cpuid_t *) __LC_CPUID)->ident;
-	css->global_pgid.cpu_model = ((cpuid_t *) __LC_CPUID)->machine;
+	css->global_pgid.cpu_id = S390_lowcore.cpu_id.ident;
+	css->global_pgid.cpu_model = S390_lowcore.cpu_id.machine;
 	css->global_pgid.tod_high = tod_high;
 
 }
@@ -920,8 +916,10 @@
 				goto out_device;
 		}
 		ret = device_register(&css->pseudo_subchannel->dev);
-		if (ret)
+		if (ret) {
+			put_device(&css->pseudo_subchannel->dev);
 			goto out_file;
+		}
 	}
 	ret = register_reboot_notifier(&css_reboot_notifier);
 	if (ret)

diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index d593bc7..0f95405 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c

@@ -307,8 +307,11 @@
 
 static void ccw_device_unregister(struct ccw_device *cdev)
 {
-	if (test_and_clear_bit(1, &cdev->private->registered))
+	if (test_and_clear_bit(1, &cdev->private->registered)) {
 		device_del(&cdev->dev);
+		/* Release reference from device_initialize(). */
+		put_device(&cdev->dev);
+	}
 }
 
 static void ccw_device_remove_orphan_cb(struct work_struct *work)
@@ -319,7 +322,6 @@
 	priv = container_of(work, struct ccw_device_private, kick_work);
 	cdev = priv->cdev;
 	ccw_device_unregister(cdev);
-	put_device(&cdev->dev);
 	/* Release cdev reference for workqueue processing. */
 	put_device(&cdev->dev);
 }
@@ -333,15 +335,15 @@
 	 * Forced offline in disconnected state means
 	 * 'throw away device'.
 	 */
-	/* Get cdev reference for workqueue processing. */
-	if (!get_device(&cdev->dev))
-		return;
 	if (ccw_device_is_orphan(cdev)) {
 		/*
 		 * Deregister ccw device.
 		 * Unfortunately, we cannot do this directly from the
 		 * attribute method.
 		 */
+		/* Get cdev reference for workqueue processing. */
+		if (!get_device(&cdev->dev))
+			return;
 		spin_lock_irqsave(cdev->ccwlock, flags);
 		cdev->private->state = DEV_STATE_NOT_OPER;
 		spin_unlock_irqrestore(cdev->ccwlock, flags);
@@ -380,30 +382,34 @@
 	}
 	cdev->online = 0;
 	spin_lock_irq(cdev->ccwlock);
-	ret = ccw_device_offline(cdev);
-	if (ret == -ENODEV) {
-		if (cdev->private->state != DEV_STATE_NOT_OPER) {
-			cdev->private->state = DEV_STATE_OFFLINE;
-			dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
-		}
+	/* Wait until a final state or DISCONNECTED is reached */
+	while (!dev_fsm_final_state(cdev) &&
+	       cdev->private->state != DEV_STATE_DISCONNECTED) {
 		spin_unlock_irq(cdev->ccwlock);
-		/* Give up reference from ccw_device_set_online(). */
-		put_device(&cdev->dev);
-		return ret;
+		wait_event(cdev->private->wait_q, (dev_fsm_final_state(cdev) ||
+			   cdev->private->state == DEV_STATE_DISCONNECTED));
+		spin_lock_irq(cdev->ccwlock);
 	}
+	ret = ccw_device_offline(cdev);
+	if (ret)
+		goto error;
 	spin_unlock_irq(cdev->ccwlock);
-	if (ret == 0) {
-		wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev));
-		/* Give up reference from ccw_device_set_online(). */
-		put_device(&cdev->dev);
-	} else {
-		CIO_MSG_EVENT(0, "ccw_device_offline returned %d, "
-			      "device 0.%x.%04x\n",
-			      ret, cdev->private->dev_id.ssid,
-			      cdev->private->dev_id.devno);
-		cdev->online = 1;
-	}
-	return ret;
+	wait_event(cdev->private->wait_q, (dev_fsm_final_state(cdev) ||
+		   cdev->private->state == DEV_STATE_DISCONNECTED));
+	/* Give up reference from ccw_device_set_online(). */
+	put_device(&cdev->dev);
+	return 0;
+
+error:
+	CIO_MSG_EVENT(0, "ccw_device_offline returned %d, device 0.%x.%04x\n",
+		      ret, cdev->private->dev_id.ssid,
+		      cdev->private->dev_id.devno);
+	cdev->private->state = DEV_STATE_OFFLINE;
+	dev_fsm_event(cdev, DEV_EVENT_NOTOPER);
+	spin_unlock_irq(cdev->ccwlock);
+	/* Give up reference from ccw_device_set_online(). */
+	put_device(&cdev->dev);
+	return -ENODEV;
 }
 
 /**
@@ -421,6 +427,7 @@
 int ccw_device_set_online(struct ccw_device *cdev)
 {
 	int ret;
+	int ret2;
 
 	if (!cdev)
 		return -ENODEV;
@@ -444,28 +451,53 @@
 		put_device(&cdev->dev);
 		return ret;
 	}
-	if (cdev->private->state != DEV_STATE_ONLINE) {
+	spin_lock_irq(cdev->ccwlock);
+	/* Check if online processing was successful */
+	if ((cdev->private->state != DEV_STATE_ONLINE) &&
+	    (cdev->private->state != DEV_STATE_W4SENSE)) {
+		spin_unlock_irq(cdev->ccwlock);
 		/* Give up online reference since onlining failed. */
 		put_device(&cdev->dev);
 		return -ENODEV;
 	}
-	if (!cdev->drv->set_online || cdev->drv->set_online(cdev) == 0) {
-		cdev->online = 1;
-		return 0;
-	}
-	spin_lock_irq(cdev->ccwlock);
-	ret = ccw_device_offline(cdev);
 	spin_unlock_irq(cdev->ccwlock);
-	if (ret == 0)
-		wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev));
-	else
-		CIO_MSG_EVENT(0, "ccw_device_offline returned %d, "
-			      "device 0.%x.%04x\n",
-			      ret, cdev->private->dev_id.ssid,
-			      cdev->private->dev_id.devno);
+	if (cdev->drv->set_online)
+		ret = cdev->drv->set_online(cdev);
+	if (ret)
+		goto rollback;
+	cdev->online = 1;
+	return 0;
+
+rollback:
+	spin_lock_irq(cdev->ccwlock);
+	/* Wait until a final state or DISCONNECTED is reached */
+	while (!dev_fsm_final_state(cdev) &&
+	       cdev->private->state != DEV_STATE_DISCONNECTED) {
+		spin_unlock_irq(cdev->ccwlock);
+		wait_event(cdev->private->wait_q, (dev_fsm_final_state(cdev) ||
+			   cdev->private->state == DEV_STATE_DISCONNECTED));
+		spin_lock_irq(cdev->ccwlock);
+	}
+	ret2 = ccw_device_offline(cdev);
+	if (ret2)
+		goto error;
+	spin_unlock_irq(cdev->ccwlock);
+	wait_event(cdev->private->wait_q, (dev_fsm_final_state(cdev) ||
+		   cdev->private->state == DEV_STATE_DISCONNECTED));
 	/* Give up online reference since onlining failed. */
 	put_device(&cdev->dev);
-	return (ret == 0) ? -ENODEV : ret;
+	return ret;
+
+error:
+	CIO_MSG_EVENT(0, "rollback ccw_device_offline returned %d, "
+		      "device 0.%x.%04x\n",
+		      ret2, cdev->private->dev_id.ssid,
+		      cdev->private->dev_id.devno);
+	cdev->private->state = DEV_STATE_OFFLINE;
+	spin_unlock_irq(cdev->ccwlock);
+	/* Give up online reference since onlining failed. */
+	put_device(&cdev->dev);
+	return ret;
 }
 
 static int online_store_handle_offline(struct ccw_device *cdev)
@@ -637,8 +669,12 @@
 	int ret;
 
 	dev->bus = &ccw_bus_type;
-
-	if ((ret = device_add(dev)))
+	ret = dev_set_name(&cdev->dev, "0.%x.%04x", cdev->private->dev_id.ssid,
+			   cdev->private->dev_id.devno);
+	if (ret)
+		return ret;
+	ret = device_add(dev);
+	if (ret)
 		return ret;
 
 	set_bit(1, &cdev->private->registered);
@@ -1024,9 +1060,6 @@
 		return;
 	sch = to_subchannel(cdev->dev.parent);
 	css_sch_device_unregister(sch);
-	/* Reset intparm to zeroes. */
-	sch->config.intparm = 0;
-	cio_commit_config(sch);
 	/* Release cdev reference for workqueue processing.*/
 	put_device(&cdev->dev);
 	/* Release subchannel reference for local processing. */
@@ -1035,6 +1068,9 @@
 
 void ccw_device_schedule_sch_unregister(struct ccw_device *cdev)
 {
+	/* Get cdev reference for workqueue processing. */
+	if (!get_device(&cdev->dev))
+		return;
 	PREPARE_WORK(&cdev->private->kick_work,
 		     ccw_device_call_sch_unregister);
 	queue_work(slow_path_wq, &cdev->private->kick_work);
@@ -1055,9 +1091,6 @@
 		/* Device did not respond in time. */
 	case DEV_STATE_NOT_OPER:
 		cdev->private->flags.recog_done = 1;
-		/* Remove device found not operational. */
-		if (!get_device(&cdev->dev))
-			break;
 		ccw_device_schedule_sch_unregister(cdev);
 		if (atomic_dec_and_test(&ccw_device_init_count))
 			wake_up(&ccw_device_init_wq);
@@ -1095,13 +1128,6 @@
 	init_waitqueue_head(&priv->wait_q);
 	init_timer(&priv->timer);
 
-	/* Set an initial name for the device. */
-	if (cio_is_console(sch->schid))
-		cdev->dev.init_name = cio_get_console_cdev_name(sch);
-	else
-		dev_set_name(&cdev->dev, "0.%x.%04x",
-			     sch->schid.ssid, sch->schib.pmcw.dev);
-
 	/* Increase counter of devices currently in recognition. */
 	atomic_inc(&ccw_device_init_count);
 
@@ -1171,8 +1197,8 @@
 
 	cdev = sch_get_cdev(sch);
 
-	CIO_TRACE_EVENT(3, "IRQ");
-	CIO_TRACE_EVENT(3, dev_name(&sch->dev));
+	CIO_TRACE_EVENT(6, "IRQ");
+	CIO_TRACE_EVENT(6, dev_name(&sch->dev));
 	if (cdev)
 		dev_fsm_event(cdev, DEV_EVENT_INTERRUPT);
 }
@@ -1210,9 +1236,6 @@
 
 	sch = container_of(work, struct subchannel, work);
 	css_sch_device_unregister(sch);
-	/* Reset intparm to zeroes. */
-	sch->config.intparm = 0;
-	cio_commit_config(sch);
 	put_device(&sch->dev);
 }
 
@@ -1334,7 +1357,6 @@
 	cdev->private->state = DEV_STATE_NOT_OPER;
 	spin_unlock_irqrestore(cdev->ccwlock, flags);
 	ccw_device_unregister(cdev);
-	put_device(&cdev->dev);
 	kfree(sch->private);
 	sysfs_remove_group(&sch->dev.kobj, &io_subchannel_attr_group);
 	return 0;
@@ -1571,8 +1593,6 @@
 	spin_unlock_irq(cdev->ccwlock);
 	if (!unreg)
 		goto out;
-	if (!get_device(&cdev->dev))
-		goto out;
 	CIO_MSG_EVENT(3, "ccw: purging 0.%x.%04x\n", priv->dev_id.ssid,
 		      priv->dev_id.devno);
 	ccw_device_schedule_sch_unregister(cdev);
@@ -1688,10 +1708,6 @@
 		spin_unlock_irqrestore(sch->lock, flags);
 		css_sch_device_unregister(sch);
 		spin_lock_irqsave(sch->lock, flags);
-
-		/* Reset intparm to zeroes. */
-		sch->config.intparm = 0;
-		cio_commit_config(sch);
 		break;
 	case REPROBE:
 		ccw_device_trigger_reprobe(cdev);
@@ -1712,7 +1728,6 @@
 
 #ifdef CONFIG_CCW_CONSOLE
 static struct ccw_device console_cdev;
-static char console_cdev_name[10] = "0.x.xxxx";
 static struct ccw_device_private console_private;
 static int console_cdev_in_use;
 
@@ -1796,13 +1811,6 @@
 	return ccw_device_pm_restore(&console_cdev.dev);
 }
 EXPORT_SYMBOL_GPL(ccw_device_force_console);
-
-const char *cio_get_console_cdev_name(struct subchannel *sch)
-{
-	snprintf(console_cdev_name, 10, "0.%x.%04x",
-		 sch->schid.ssid, sch->schib.pmcw.dev);
-	return (const char *)console_cdev_name;
-}
 #endif
 
 /*
@@ -2020,7 +2028,9 @@
 	spin_unlock_irq(sch->lock);
 	if (ret) {
 		CIO_MSG_EVENT(0, "Couldn't start recognition for device "
-			      "%s (ret=%d)\n", dev_name(&cdev->dev), ret);
+			      "0.%x.%04x (ret=%d)\n",
+			      cdev->private->dev_id.ssid,
+			      cdev->private->dev_id.devno, ret);
 		spin_lock_irq(sch->lock);
 		cdev->private->state = DEV_STATE_DISCONNECTED;
 		spin_unlock_irq(sch->lock);
@@ -2083,8 +2093,9 @@
 	}
 	/* check if the device id has changed */
 	if (sch->schib.pmcw.dev != cdev->private->dev_id.devno) {
-		CIO_MSG_EVENT(0, "resume: sch %s: failed (devno changed from "
-			      "%04x to %04x)\n", dev_name(&sch->dev),
+		CIO_MSG_EVENT(0, "resume: sch 0.%x.%04x: failed (devno "
+			      "changed from %04x to %04x)\n",
+			      sch->schid.ssid, sch->schid.sch_no,
 			      cdev->private->dev_id.devno,
 			      sch->schib.pmcw.dev);
 		goto out_unreg_unlock;
@@ -2117,8 +2128,9 @@
 	if (cm_enabled) {
 		ret = ccw_set_cmf(cdev, 1);
 		if (ret) {
-			CIO_MSG_EVENT(2, "resume: cdev %s: cmf failed "
-				      "(rc=%d)\n", dev_name(&cdev->dev), ret);
+			CIO_MSG_EVENT(2, "resume: cdev 0.%x.%04x: cmf failed "
+				      "(rc=%d)\n", cdev->private->dev_id.ssid,
+				      cdev->private->dev_id.devno, ret);
 			ret = 0;
 		}
 	}

diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 3db88c5..e728ce4 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c

@@ -394,6 +394,13 @@
 			ccw_device_schedule_sch_unregister(cdev);
 		cdev->private->flags.donotify = 0;
 	}
+	if (state == DEV_STATE_NOT_OPER) {
+		CIO_MSG_EVENT(0, "Device %04x gone on subchannel %04x\n",
+			      cdev->private->dev_id.devno, sch->schid.sch_no);
+		if (!ccw_device_notify(cdev, CIO_GONE))
+			ccw_device_schedule_sch_unregister(cdev);
+		cdev->private->flags.donotify = 0;
+	}
 
 	if (cdev->private->flags.donotify) {
 		cdev->private->flags.donotify = 0;
@@ -731,6 +738,17 @@
 }
 
 /*
+ * Handle path verification event in offline state.
+ */
+static void ccw_device_offline_verify(struct ccw_device *cdev,
+				      enum dev_event dev_event)
+{
+	struct subchannel *sch = to_subchannel(cdev->dev.parent);
+
+	css_schedule_eval(sch->schid);
+}
+
+/*
  * Handle path verification event.
  */
 static void
@@ -887,6 +905,8 @@
 	}
 call_handler:
 	cdev->private->state = DEV_STATE_ONLINE;
+	/* In case sensing interfered with setting the device online */
+	wake_up(&cdev->private->wait_q);
 	/* Call the handler. */
 	if (ccw_device_call_handler(cdev) && cdev->private->flags.doverify)
 		/* Start delayed path verification. */
@@ -1149,7 +1169,7 @@
 		[DEV_EVENT_NOTOPER]	= ccw_device_generic_notoper,
 		[DEV_EVENT_INTERRUPT]	= ccw_device_offline_irq,
 		[DEV_EVENT_TIMEOUT]	= ccw_device_nop,
-		[DEV_EVENT_VERIFY]	= ccw_device_nop,
+		[DEV_EVENT_VERIFY]	= ccw_device_offline_verify,
 	},
 	[DEV_STATE_VERIFY] = {
 		[DEV_EVENT_NOTOPER]	= ccw_device_generic_notoper,

diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index b1241f8..ff7748a 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h

@@ -1,7 +1,7 @@
 /*
  * linux/drivers/s390/cio/qdio.h
  *
- * Copyright 2000,2008 IBM Corp.
+ * Copyright 2000,2009 IBM Corp.
  * Author(s): Utz Bacher <utz.bacher@de.ibm.com>
  *	      Jan Glauber <jang@linux.vnet.ibm.com>
  */
@@ -246,6 +246,7 @@
 	atomic_t nr_buf_used;
 
 	struct qdio_irq *irq_ptr;
+	struct dentry *debugfs_q;
 	struct tasklet_struct tasklet;
 
 	/* error condition during a data transfer */
@@ -267,6 +268,7 @@
 	struct qib qib;
 	u32 *dsci;		/* address of device state change indicator */
 	struct ccw_device *cdev;
+	struct dentry *debugfs_dev;
 
 	unsigned long int_parm;
 	struct subchannel_id schid;

diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c
index b8626d4..1b78f63 100644
--- a/drivers/s390/cio/qdio_debug.c
+++ b/drivers/s390/cio/qdio_debug.c

@@ -1,14 +1,12 @@
 /*
  *  drivers/s390/cio/qdio_debug.c
  *
- *  Copyright IBM Corp. 2008
+ *  Copyright IBM Corp. 2008,2009
  *
  *  Author: Jan Glauber (jang@linux.vnet.ibm.com)
  */
-#include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
-#include <asm/qdio.h>
 #include <asm/debug.h>
 #include "qdio_debug.h"
 #include "qdio.h"
@@ -17,10 +15,7 @@
 debug_info_t *qdio_dbf_error;
 
 static struct dentry *debugfs_root;
-#define MAX_DEBUGFS_QUEUES	32
-static struct dentry *debugfs_queues[MAX_DEBUGFS_QUEUES] = { NULL };
-static DEFINE_MUTEX(debugfs_mutex);
-#define QDIO_DEBUGFS_NAME_LEN	40
+#define QDIO_DEBUGFS_NAME_LEN	10
 
 void qdio_allocate_dbf(struct qdio_initialize *init_data,
 		       struct qdio_irq *irq_ptr)
@@ -130,20 +125,6 @@
 			   filp->f_path.dentry->d_inode->i_private);
 }
 
-static void remove_debugfs_entry(struct qdio_q *q)
-{
-	int i;
-
-	for (i = 0; i < MAX_DEBUGFS_QUEUES; i++) {
-		if (!debugfs_queues[i])
-			continue;
-		if (debugfs_queues[i]->d_inode->i_private == q) {
-			debugfs_remove(debugfs_queues[i]);
-			debugfs_queues[i] = NULL;
-		}
-	}
-}
-
 static struct file_operations debugfs_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = qstat_seq_open,
@@ -155,22 +136,15 @@
 
 static void setup_debugfs_entry(struct qdio_q *q, struct ccw_device *cdev)
 {
-	int i = 0;
 	char name[QDIO_DEBUGFS_NAME_LEN];
 
-	while (debugfs_queues[i] != NULL) {
-		i++;
-		if (i >= MAX_DEBUGFS_QUEUES)
-			return;
-	}
-	snprintf(name, QDIO_DEBUGFS_NAME_LEN, "%s_%s_%d",
-		 dev_name(&cdev->dev),
+	snprintf(name, QDIO_DEBUGFS_NAME_LEN, "%s_%d",
 		 q->is_input_q ? "input" : "output",
 		 q->nr);
-	debugfs_queues[i] = debugfs_create_file(name, S_IFREG | S_IRUGO | S_IWUSR,
-						debugfs_root, q, &debugfs_fops);
-	if (IS_ERR(debugfs_queues[i]))
-		debugfs_queues[i] = NULL;
+	q->debugfs_q = debugfs_create_file(name, S_IFREG | S_IRUGO | S_IWUSR,
+				q->irq_ptr->debugfs_dev, q, &debugfs_fops);
+	if (IS_ERR(q->debugfs_q))
+		q->debugfs_q = NULL;
 }
 
 void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev)
@@ -178,12 +152,14 @@
 	struct qdio_q *q;
 	int i;
 
-	mutex_lock(&debugfs_mutex);
+	irq_ptr->debugfs_dev = debugfs_create_dir(dev_name(&cdev->dev),
+						  debugfs_root);
+	if (IS_ERR(irq_ptr->debugfs_dev))
+		irq_ptr->debugfs_dev = NULL;
 	for_each_input_queue(irq_ptr, q, i)
 		setup_debugfs_entry(q, cdev);
 	for_each_output_queue(irq_ptr, q, i)
 		setup_debugfs_entry(q, cdev);
-	mutex_unlock(&debugfs_mutex);
 }
 
 void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev)
@@ -191,17 +167,16 @@
 	struct qdio_q *q;
 	int i;
 
-	mutex_lock(&debugfs_mutex);
 	for_each_input_queue(irq_ptr, q, i)
-		remove_debugfs_entry(q);
+		debugfs_remove(q->debugfs_q);
 	for_each_output_queue(irq_ptr, q, i)
-		remove_debugfs_entry(q);
-	mutex_unlock(&debugfs_mutex);
+		debugfs_remove(q->debugfs_q);
+	debugfs_remove(irq_ptr->debugfs_dev);
 }
 
 int __init qdio_debug_init(void)
 {
-	debugfs_root = debugfs_create_dir("qdio_queues", NULL);
+	debugfs_root = debugfs_create_dir("qdio", NULL);
 
 	qdio_dbf_setup = debug_register("qdio_setup", 16, 1, 16);
 	debug_register_view(qdio_dbf_setup, &debug_hex_ascii_view);

diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 0038750..9aef402 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c

@@ -798,8 +798,10 @@
 
 	if (!qdio_inbound_q_done(q)) {
 		qdio_perf_stat_inc(&perf_stats.thinint_inbound_loop);
-		if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED))
+		if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) {
 			tasklet_schedule(&q->tasklet);
+			return;
+		}
 	}
 
 	qdio_stop_polling(q);

diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index ed3dcde..090b32a 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c

@@ -648,7 +648,9 @@
 	/* Poll on the device until all requests are finished. */
 	do {
 		flags = 0;
+		spin_lock_bh(&ap_dev->lock);
 		__ap_poll_device(ap_dev, &flags);
+		spin_unlock_bh(&ap_dev->lock);
 	} while ((flags & 1) || (flags & 2));
 
 	ap_device_remove(dev);
@@ -1109,12 +1111,15 @@
 
 		ap_dev->device.bus = &ap_bus_type;
 		ap_dev->device.parent = ap_root_device;
-		dev_set_name(&ap_dev->device, "card%02x",
-			     AP_QID_DEVICE(ap_dev->qid));
+		if (dev_set_name(&ap_dev->device, "card%02x",
+				 AP_QID_DEVICE(ap_dev->qid))) {
+			kfree(ap_dev);
+			continue;
+		}
 		ap_dev->device.release = ap_device_release;
 		rc = device_register(&ap_dev->device);
 		if (rc) {
-			kfree(ap_dev);
+			put_device(&ap_dev->device);
 			continue;
 		}
 		/* Add device attributes. */
@@ -1407,14 +1412,12 @@
 
 static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags)
 {
-	spin_lock(&ap_dev->lock);
 	if (!ap_dev->unregistered) {
 		if (ap_poll_queue(ap_dev, flags))
 			ap_dev->unregistered = 1;
 		if (ap_dev->reset == AP_RESET_DO)
 			ap_reset(ap_dev);
 	}
-	spin_unlock(&ap_dev->lock);
 	return 0;
 }
 
@@ -1441,7 +1444,9 @@
 		flags = 0;
 		spin_lock(&ap_device_list_lock);
 		list_for_each_entry(ap_dev, &ap_device_list, list) {
+			spin_lock(&ap_dev->lock);
 			__ap_poll_device(ap_dev, &flags);
+			spin_unlock(&ap_dev->lock);
 		}
 		spin_unlock(&ap_device_list_lock);
 	} while (flags & 1);
@@ -1487,7 +1492,9 @@
 		flags = 0;
 		spin_lock_bh(&ap_device_list_lock);
 		list_for_each_entry(ap_dev, &ap_device_list, list) {
+			spin_lock(&ap_dev->lock);
 			__ap_poll_device(ap_dev, &flags);
+			spin_unlock(&ap_dev->lock);
 		}
 		spin_unlock_bh(&ap_device_list_lock);
 	}

diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index e38e5d3..2930fc7 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c

@@ -403,10 +403,14 @@
 	return len;
 }
 
-void __init s390_virtio_console_init(void)
+static int __init s390_virtio_console_init(void)
 {
-	virtio_cons_early_init(early_put_chars);
+	if (!MACHINE_IS_KVM)
+		return -ENODEV;
+	return virtio_cons_early_init(early_put_chars);
 }
+console_initcall(s390_virtio_console_init);
+
 
 /*
  * We do this after core stuff, but before the drivers.

diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 8c36eaf..87dff11 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c

@@ -1839,9 +1839,10 @@
 		return -ENOMEM;
 
 	ret = device_register(dev);
-
-	if (ret)
+	if (ret) {
+		put_device(dev);
 		return ret;
+	}
 	ret = netiucv_add_files(dev);
 	if (ret)
 		goto out_unreg;
@@ -2226,8 +2227,10 @@
 	netiucv_dev->release = (void (*)(struct device *))kfree;
 	netiucv_dev->driver = &netiucv_driver;
 	rc = device_register(netiucv_dev);
-	if (rc)
+	if (rc) {
+		put_device(netiucv_dev);
 		goto out_driver;
+	}
 	netiucv_banner();
 	return rc;
 

diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c
index e76a320..102000d 100644
--- a/drivers/s390/net/smsgiucv.c
+++ b/drivers/s390/net/smsgiucv.c

@@ -219,13 +219,13 @@
 	smsg_dev->driver = &smsg_driver;
 	rc = device_register(smsg_dev);
 	if (rc)
-		goto out_free_dev;
+		goto out_put;
 
 	cpcmd("SET SMSG IUCV", NULL, 0, NULL);
 	return 0;
 
-out_free_dev:
-	kfree(smsg_dev);
+out_put:
+	put_device(smsg_dev);
 out_free_path:
 	iucv_path_free(smsg_path);
 	smsg_path = NULL;

diff --git a/drivers/scsi/cxgb3i/cxgb3i_init.c b/drivers/scsi/cxgb3i/cxgb3i_init.c
index 042d9bc..d0ab23a 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_init.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_init.c

@@ -26,7 +26,7 @@
 
 static void open_s3_dev(struct t3cdev *);
 static void close_s3_dev(struct t3cdev *);
-static void s3_err_handler(struct t3cdev *tdev, u32 status, u32 error);
+static void s3_event_handler(struct t3cdev *tdev, u32 event, u32 port);
 
 static cxgb3_cpl_handler_func cxgb3i_cpl_handlers[NUM_CPL_CMDS];
 static struct cxgb3_client t3c_client = {
@@ -34,7 +34,7 @@
 	.handlers = cxgb3i_cpl_handlers,
 	.add = open_s3_dev,
 	.remove = close_s3_dev,
-	.err_handler = s3_err_handler,
+	.event_handler = s3_event_handler,
 };
 
 /**
@@ -66,16 +66,16 @@
 	cxgb3i_ddp_cleanup(t3dev);
 }
 
-static void s3_err_handler(struct t3cdev *tdev, u32 status, u32 error)
+static void s3_event_handler(struct t3cdev *tdev, u32 event, u32 port)
 {
 	struct cxgb3i_adapter *snic = cxgb3i_adapter_find_by_tdev(tdev);
 
-	cxgb3i_log_info("snic 0x%p, tdev 0x%p, status 0x%x, err 0x%x.\n",
-			snic, tdev, status, error);
+	cxgb3i_log_info("snic 0x%p, tdev 0x%p, event 0x%x, port 0x%x.\n",
+			snic, tdev, event, port);
 	if (!snic)
 		return;
 
-	switch (status) {
+	switch (event) {
 	case OFFLOAD_STATUS_DOWN:
 		snic->flags |= CXGB3I_ADAPTER_FLAG_RESET;
 		break;

diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 9d7c993..640f65c 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c

@@ -1752,12 +1752,12 @@
 	mutex_lock(&dev->mutex);
 	if (dev->attached)
 		goto ok;
-	if (!capable(CAP_SYS_MODULE) && dev->in_request_module) {
+	if (!capable(CAP_NET_ADMIN) && dev->in_request_module) {
 		DPRINTK("in request module\n");
 		mutex_unlock(&dev->mutex);
 		return -ENODEV;
 	}
-	if (capable(CAP_SYS_MODULE) && dev->in_request_module)
+	if (capable(CAP_NET_ADMIN) && dev->in_request_module)
 		goto ok;
 
 	dev->in_request_module = 1;
@@ -1770,8 +1770,8 @@
 
 	dev->in_request_module = 0;
 
-	if (!dev->attached && !capable(CAP_SYS_MODULE)) {
-		DPRINTK("not attached and not CAP_SYS_MODULE\n");
+	if (!dev->attached && !capable(CAP_NET_ADMIN)) {
+		DPRINTK("not attached and not CAP_NET_ADMIN\n");
 		mutex_unlock(&dev->mutex);
 		return -ENODEV;
 	}

diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index 7b60579..e63c9be 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c

@@ -1950,14 +1950,7 @@
  */
 static void pohmelfs_kill_super(struct super_block *sb)
 {
-	struct writeback_control wbc = {
-		.sync_mode	= WB_SYNC_ALL,
-		.range_start	= 0,
-		.range_end	= LLONG_MAX,
-		.nr_to_write	= LONG_MAX,
-	};
-	generic_sync_sb_inodes(sb, &wbc);
-
+	sync_inodes_sb(sb);
 	kill_anon_super(sb);
 }
 

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b7c1603..7c1e65d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c

@@ -501,22 +501,22 @@
 		}
 	}
 
-	/*
-	 * Now fill out the bss section.  First pad the last page up
-	 * to the page boundary, and then perform a mmap to make sure
-	 * that there are zero-mapped pages up to and including the 
-	 * last bss page.
-	 */
-	if (padzero(elf_bss)) {
-		error = -EFAULT;
-		goto out_close;
-	}
-
-	/* What we have mapped so far */
-	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
-
-	/* Map the last of the bss segment */
 	if (last_bss > elf_bss) {
+		/*
+		 * Now fill out the bss section.  First pad the last page up
+		 * to the page boundary, and then perform a mmap to make sure
+		 * that there are zero-mapped pages up to and including the
+		 * last bss page.
+		 */
+		if (padzero(elf_bss)) {
+			error = -EFAULT;
+			goto out_close;
+		}
+
+		/* What we have mapped so far */
+		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
+
+		/* Map the last of the bss segment */
 		down_write(&current->mm->mmap_sem);
 		error = do_brk(elf_bss, last_bss - elf_bss);
 		up_write(&current->mm->mmap_sem);

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e83be2e..15831d5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c

@@ -1352,6 +1352,7 @@
 {
 	int err;
 
+	bdi->name = "btrfs";
 	bdi->capabilities = BDI_CAP_MAP_COPY;
 	err = bdi_init(bdi);
 	if (err)

diff --git a/fs/buffer.c b/fs/buffer.c
index 28f320f..90a9886 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c

@@ -281,7 +281,7 @@
 	struct zone *zone;
 	int nid;
 
-	wakeup_pdflush(1024);
+	wakeup_flusher_threads(1024);
 	yield();
 
 	for_each_online_node(nid) {

diff --git a/fs/char_dev.c b/fs/char_dev.c
index 2f18c1e..3cbc57f 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c

@@ -31,6 +31,7 @@
  * - no readahead or I/O queue unplugging required
  */
 struct backing_dev_info directly_mappable_cdev_bdi = {
+	.name = "char",
 	.capabilities	= (
 #ifdef CONFIG_MMU
 		/* permit private copies of the data to be taken */

diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 4921e742..a2f7460 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c

@@ -51,6 +51,7 @@
 };
 
 static struct backing_dev_info configfs_backing_dev_info = {
+	.name		= "configfs",
 	.ra_pages	= 0,	/* No readahead */
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };

diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index d636e12..a63d442 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c

@@ -230,7 +230,7 @@
 	return error;
 }
 
-static int
+int
 ext2_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
@@ -246,12 +246,6 @@
 	return -EAGAIN;
 }
 
-int
-ext2_permission(struct inode *inode, int mask)
-{
-	return generic_permission(inode, mask, ext2_check_acl);
-}
-
 /*
  * Initialize the ACLs of a new inode. Called from ext2_new_inode.
  *

diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index ecefe47..3ff6cbb 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h

@@ -54,13 +54,13 @@
 #ifdef CONFIG_EXT2_FS_POSIX_ACL
 
 /* acl.c */
-extern int ext2_permission (struct inode *, int);
+extern int ext2_check_acl (struct inode *, int);
 extern int ext2_acl_chmod (struct inode *);
 extern int ext2_init_acl (struct inode *, struct inode *);
 
 #else
 #include <linux/sched.h>
-#define ext2_permission NULL
+#define ext2_check_acl	NULL
 #define ext2_get_acl	NULL
 #define ext2_set_acl	NULL
 

diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2b9e47d..a2f3afd 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c

@@ -85,6 +85,6 @@
 	.removexattr	= generic_removexattr,
 #endif
 	.setattr	= ext2_setattr,
-	.permission	= ext2_permission,
+	.check_acl	= ext2_check_acl,
 	.fiemap		= ext2_fiemap,
 };

diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 78d9b92..23701f2 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c

@@ -400,7 +400,7 @@
 	.removexattr	= generic_removexattr,
 #endif
 	.setattr	= ext2_setattr,
-	.permission	= ext2_permission,
+	.check_acl	= ext2_check_acl,
 };
 
 const struct inode_operations ext2_special_inode_operations = {
@@ -411,5 +411,5 @@
 	.removexattr	= generic_removexattr,
 #endif
 	.setattr	= ext2_setattr,
-	.permission	= ext2_permission,
+	.check_acl	= ext2_check_acl,
 };

diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e167bae..c9b0df3 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c

@@ -238,7 +238,7 @@
 	return error;
 }
 
-static int
+int
 ext3_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
@@ -254,12 +254,6 @@
 	return -EAGAIN;
 }
 
-int
-ext3_permission(struct inode *inode, int mask)
-{
-	return generic_permission(inode, mask, ext3_check_acl);
-}
-
 /*
  * Initialize the ACLs of a new inode. Called from ext3_new_inode.
  *

diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 07d15a3..5973346 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h

@@ -54,13 +54,13 @@
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
 
 /* acl.c */
-extern int ext3_permission (struct inode *, int);
+extern int ext3_check_acl (struct inode *, int);
 extern int ext3_acl_chmod (struct inode *);
 extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
 
 #else  /* CONFIG_EXT3_FS_POSIX_ACL */
 #include <linux/sched.h>
-#define ext3_permission NULL
+#define ext3_check_acl NULL
 
 static inline int
 ext3_acl_chmod(struct inode *inode)

diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 5b49704..2992532 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c

@@ -137,7 +137,7 @@
 	.listxattr	= ext3_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
-	.permission	= ext3_permission,
+	.check_acl	= ext3_check_acl,
 	.fiemap		= ext3_fiemap,
 };
 

diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 6ff7b97..aad6400 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c

@@ -2445,7 +2445,7 @@
 	.listxattr	= ext3_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
-	.permission	= ext3_permission,
+	.check_acl	= ext3_check_acl,
 };
 
 const struct inode_operations ext3_special_inode_operations = {
@@ -2456,5 +2456,5 @@
 	.listxattr	= ext3_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
-	.permission	= ext3_permission,
+	.check_acl	= ext3_check_acl,
 };

diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index f6d8967..0df88b2 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c

@@ -236,7 +236,7 @@
 	return error;
 }
 
-static int
+int
 ext4_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
@@ -252,12 +252,6 @@
 	return -EAGAIN;
 }
 
-int
-ext4_permission(struct inode *inode, int mask)
-{
-	return generic_permission(inode, mask, ext4_check_acl);
-}
-
 /*
  * Initialize the ACLs of a new inode. Called from ext4_new_inode.
  *

diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 949789d..9d843d5 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h

@@ -54,13 +54,13 @@
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 
 /* acl.c */
-extern int ext4_permission(struct inode *, int);
+extern int ext4_check_acl(struct inode *, int);
 extern int ext4_acl_chmod(struct inode *);
 extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
 
 #else  /* CONFIG_EXT4_FS_POSIX_ACL */
 #include <linux/sched.h>
-#define ext4_permission NULL
+#define ext4_check_acl NULL
 
 static inline int
 ext4_acl_chmod(struct inode *inode)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3f1873f..27f3c53 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c

@@ -207,7 +207,7 @@
 	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
-	.permission	= ext4_permission,
+	.check_acl	= ext4_check_acl,
 	.fallocate	= ext4_fallocate,
 	.fiemap		= ext4_fiemap,
 };

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index de04013..114abe5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c

@@ -2536,7 +2536,7 @@
 	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
-	.permission	= ext4_permission,
+	.check_acl	= ext4_check_acl,
 	.fiemap         = ext4_fiemap,
 };
 
@@ -2548,5 +2548,5 @@
 	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
-	.permission	= ext4_permission,
+	.check_acl	= ext4_check_acl,
 };

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c54226b..da86ef5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c

@@ -19,49 +19,925 @@
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 #include "internal.h"
 
+#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
 
-/**
- * writeback_acquire - attempt to get exclusive writeback access to a device
- * @bdi: the device's backing_dev_info structure
- *
- * It is a waste of resources to have more than one pdflush thread blocked on
- * a single request queue.  Exclusion at the request_queue level is obtained
- * via a flag in the request_queue's backing_dev_info.state.
- *
- * Non-request_queue-backed address_spaces will share default_backing_dev_info,
- * unless they implement their own.  Which is somewhat inefficient, as this
- * may prevent concurrent writeback against multiple devices.
+/*
+ * We don't actually have pdflush, but this one is exported though /proc...
  */
-static int writeback_acquire(struct backing_dev_info *bdi)
+int nr_pdflush_threads;
+
+/*
+ * Work items for the bdi_writeback threads
+ */
+struct bdi_work {
+	struct list_head list;
+	struct list_head wait_list;
+	struct rcu_head rcu_head;
+
+	unsigned long seen;
+	atomic_t pending;
+
+	struct super_block *sb;
+	unsigned long nr_pages;
+	enum writeback_sync_modes sync_mode;
+
+	unsigned long state;
+};
+
+enum {
+	WS_USED_B = 0,
+	WS_ONSTACK_B,
+};
+
+#define WS_USED (1 << WS_USED_B)
+#define WS_ONSTACK (1 << WS_ONSTACK_B)
+
+static inline bool bdi_work_on_stack(struct bdi_work *work)
 {
-	return !test_and_set_bit(BDI_pdflush, &bdi->state);
+	return test_bit(WS_ONSTACK_B, &work->state);
+}
+
+static inline void bdi_work_init(struct bdi_work *work,
+				 struct writeback_control *wbc)
+{
+	INIT_RCU_HEAD(&work->rcu_head);
+	work->sb = wbc->sb;
+	work->nr_pages = wbc->nr_to_write;
+	work->sync_mode = wbc->sync_mode;
+	work->state = WS_USED;
+}
+
+static inline void bdi_work_init_on_stack(struct bdi_work *work,
+					  struct writeback_control *wbc)
+{
+	bdi_work_init(work, wbc);
+	work->state |= WS_ONSTACK;
 }
 
 /**
  * writeback_in_progress - determine whether there is writeback in progress
  * @bdi: the device's backing_dev_info structure.
  *
- * Determine whether there is writeback in progress against a backing device.
+ * Determine whether there is writeback waiting to be handled against a
+ * backing device.
  */
 int writeback_in_progress(struct backing_dev_info *bdi)
 {
-	return test_bit(BDI_pdflush, &bdi->state);
+	return !list_empty(&bdi->work_list);
 }
 
-/**
- * writeback_release - relinquish exclusive writeback access against a device.
- * @bdi: the device's backing_dev_info structure
- */
-static void writeback_release(struct backing_dev_info *bdi)
+static void bdi_work_clear(struct bdi_work *work)
 {
-	BUG_ON(!writeback_in_progress(bdi));
-	clear_bit(BDI_pdflush, &bdi->state);
+	clear_bit(WS_USED_B, &work->state);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&work->state, WS_USED_B);
+}
+
+static void bdi_work_free(struct rcu_head *head)
+{
+	struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
+
+	if (!bdi_work_on_stack(work))
+		kfree(work);
+	else
+		bdi_work_clear(work);
+}
+
+static void wb_work_complete(struct bdi_work *work)
+{
+	const enum writeback_sync_modes sync_mode = work->sync_mode;
+
+	/*
+	 * For allocated work, we can clear the done/seen bit right here.
+	 * For on-stack work, we need to postpone both the clear and free
+	 * to after the RCU grace period, since the stack could be invalidated
+	 * as soon as bdi_work_clear() has done the wakeup.
+	 */
+	if (!bdi_work_on_stack(work))
+		bdi_work_clear(work);
+	if (sync_mode == WB_SYNC_NONE || bdi_work_on_stack(work))
+		call_rcu(&work->rcu_head, bdi_work_free);
+}
+
+static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
+{
+	/*
+	 * The caller has retrieved the work arguments from this work,
+	 * drop our reference. If this is the last ref, delete and free it
+	 */
+	if (atomic_dec_and_test(&work->pending)) {
+		struct backing_dev_info *bdi = wb->bdi;
+
+		spin_lock(&bdi->wb_lock);
+		list_del_rcu(&work->list);
+		spin_unlock(&bdi->wb_lock);
+
+		wb_work_complete(work);
+	}
+}
+
+static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
+{
+	if (work) {
+		work->seen = bdi->wb_mask;
+		BUG_ON(!work->seen);
+		atomic_set(&work->pending, bdi->wb_cnt);
+		BUG_ON(!bdi->wb_cnt);
+
+		/*
+		 * Make sure stores are seen before it appears on the list
+		 */
+		smp_mb();
+
+		spin_lock(&bdi->wb_lock);
+		list_add_tail_rcu(&work->list, &bdi->work_list);
+		spin_unlock(&bdi->wb_lock);
+	}
+
+	/*
+	 * If the default thread isn't there, make sure we add it. When
+	 * it gets created and wakes up, we'll run this work.
+	 */
+	if (unlikely(list_empty_careful(&bdi->wb_list)))
+		wake_up_process(default_backing_dev_info.wb.task);
+	else {
+		struct bdi_writeback *wb = &bdi->wb;
+
+		/*
+		 * If we failed allocating the bdi work item, wake up the wb
+		 * thread always. As a safety precaution, it'll flush out
+		 * everything
+		 */
+		if (!wb_has_dirty_io(wb)) {
+			if (work)
+				wb_clear_pending(wb, work);
+		} else if (wb->task)
+			wake_up_process(wb->task);
+	}
+}
+
+/*
+ * Used for on-stack allocated work items. The caller needs to wait until
+ * the wb threads have acked the work before it's safe to continue.
+ */
+static void bdi_wait_on_work_clear(struct bdi_work *work)
+{
+	wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
+		    TASK_UNINTERRUPTIBLE);
+}
+
+static struct bdi_work *bdi_alloc_work(struct writeback_control *wbc)
+{
+	struct bdi_work *work;
+
+	work = kmalloc(sizeof(*work), GFP_ATOMIC);
+	if (work)
+		bdi_work_init(work, wbc);
+
+	return work;
+}
+
+void bdi_start_writeback(struct writeback_control *wbc)
+{
+	const bool must_wait = wbc->sync_mode == WB_SYNC_ALL;
+	struct bdi_work work_stack, *work = NULL;
+
+	if (!must_wait)
+		work = bdi_alloc_work(wbc);
+
+	if (!work) {
+		work = &work_stack;
+		bdi_work_init_on_stack(work, wbc);
+	}
+
+	bdi_queue_work(wbc->bdi, work);
+
+	/*
+	 * If the sync mode is WB_SYNC_ALL, block waiting for the work to
+	 * complete. If not, we only need to wait for the work to be started,
+	 * if we allocated it on-stack. We use the same mechanism, if the
+	 * wait bit is set in the bdi_work struct, then threads will not
+	 * clear pending until after they are done.
+	 *
+	 * Note that work == &work_stack if must_wait is true, so we don't
+	 * need to do call_rcu() here ever, since the completion path will
+	 * have done that for us.
+	 */
+	if (must_wait || work == &work_stack) {
+		bdi_wait_on_work_clear(work);
+		if (work != &work_stack)
+			call_rcu(&work->rcu_head, bdi_work_free);
+	}
+}
+
+/*
+ * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
+ * furthest end of its superblock's dirty-inode list.
+ *
+ * Before stamping the inode's ->dirtied_when, we check to see whether it is
+ * already the most-recently-dirtied inode on the b_dirty list.  If that is
+ * the case then the inode must have been redirtied while it was being written
+ * out and we don't reset its dirtied_when.
+ */
+static void redirty_tail(struct inode *inode)
+{
+	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+
+	if (!list_empty(&wb->b_dirty)) {
+		struct inode *tail;
+
+		tail = list_entry(wb->b_dirty.next, struct inode, i_list);
+		if (time_before(inode->dirtied_when, tail->dirtied_when))
+			inode->dirtied_when = jiffies;
+	}
+	list_move(&inode->i_list, &wb->b_dirty);
+}
+
+/*
+ * requeue inode for re-scanning after bdi->b_io list is exhausted.
+ */
+static void requeue_io(struct inode *inode)
+{
+	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+
+	list_move(&inode->i_list, &wb->b_more_io);
+}
+
+static void inode_sync_complete(struct inode *inode)
+{
+	/*
+	 * Prevent speculative execution through spin_unlock(&inode_lock);
+	 */
+	smp_mb();
+	wake_up_bit(&inode->i_state, __I_SYNC);
+}
+
+static bool inode_dirtied_after(struct inode *inode, unsigned long t)
+{
+	bool ret = time_after(inode->dirtied_when, t);
+#ifndef CONFIG_64BIT
+	/*
+	 * For inodes being constantly redirtied, dirtied_when can get stuck.
+	 * It _appears_ to be in the future, but is actually in distant past.
+	 * This test is necessary to prevent such wrapped-around relative times
+	 * from permanently stopping the whole pdflush writeback.
+	 */
+	ret = ret && time_before_eq(inode->dirtied_when, jiffies);
+#endif
+	return ret;
+}
+
+/*
+ * Move expired dirty inodes from @delaying_queue to @dispatch_queue.
+ */
+static void move_expired_inodes(struct list_head *delaying_queue,
+			       struct list_head *dispatch_queue,
+				unsigned long *older_than_this)
+{
+	while (!list_empty(delaying_queue)) {
+		struct inode *inode = list_entry(delaying_queue->prev,
+						struct inode, i_list);
+		if (older_than_this &&
+		    inode_dirtied_after(inode, *older_than_this))
+			break;
+		list_move(&inode->i_list, dispatch_queue);
+	}
+}
+
+/*
+ * Queue all expired dirty inodes for io, eldest first.
+ */
+static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
+{
+	list_splice_init(&wb->b_more_io, wb->b_io.prev);
+	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
+}
+
+static int write_inode(struct inode *inode, int sync)
+{
+	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
+		return inode->i_sb->s_op->write_inode(inode, sync);
+	return 0;
+}
+
+/*
+ * Wait for writeback on an inode to complete.
+ */
+static void inode_wait_for_writeback(struct inode *inode)
+{
+	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
+	wait_queue_head_t *wqh;
+
+	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
+	do {
+		spin_unlock(&inode_lock);
+		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
+		spin_lock(&inode_lock);
+	} while (inode->i_state & I_SYNC);
+}
+
+/*
+ * Write out an inode's dirty pages.  Called under inode_lock.  Either the
+ * caller has ref on the inode (either via __iget or via syscall against an fd)
+ * or the inode has I_WILL_FREE set (via generic_forget_inode)
+ *
+ * If `wait' is set, wait on the writeout.
+ *
+ * The whole writeout design is quite complex and fragile.  We want to avoid
+ * starvation of particular inodes when others are being redirtied, prevent
+ * livelocks, etc.
+ *
+ * Called under inode_lock.
+ */
+static int
+writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	struct address_space *mapping = inode->i_mapping;
+	int wait = wbc->sync_mode == WB_SYNC_ALL;
+	unsigned dirty;
+	int ret;
+
+	if (!atomic_read(&inode->i_count))
+		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
+	else
+		WARN_ON(inode->i_state & I_WILL_FREE);
+
+	if (inode->i_state & I_SYNC) {
+		/*
+		 * If this inode is locked for writeback and we are not doing
+		 * writeback-for-data-integrity, move it to b_more_io so that
+		 * writeback can proceed with the other inodes on s_io.
+		 *
+		 * We'll have another go at writing back this inode when we
+		 * completed a full scan of b_io.
+		 */
+		if (!wait) {
+			requeue_io(inode);
+			return 0;
+		}
+
+		/*
+		 * It's a data-integrity sync.  We must wait.
+		 */
+		inode_wait_for_writeback(inode);
+	}
+
+	BUG_ON(inode->i_state & I_SYNC);
+
+	/* Set I_SYNC, reset I_DIRTY */
+	dirty = inode->i_state & I_DIRTY;
+	inode->i_state |= I_SYNC;
+	inode->i_state &= ~I_DIRTY;
+
+	spin_unlock(&inode_lock);
+
+	ret = do_writepages(mapping, wbc);
+
+	/* Don't write the inode if only I_DIRTY_PAGES was set */
+	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
+		int err = write_inode(inode, wait);
+		if (ret == 0)
+			ret = err;
+	}
+
+	if (wait) {
+		int err = filemap_fdatawait(mapping);
+		if (ret == 0)
+			ret = err;
+	}
+
+	spin_lock(&inode_lock);
+	inode->i_state &= ~I_SYNC;
+	if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
+		if (!(inode->i_state & I_DIRTY) &&
+		    mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+			/*
+			 * We didn't write back all the pages.  nfs_writepages()
+			 * sometimes bales out without doing anything. Redirty
+			 * the inode; Move it from b_io onto b_more_io/b_dirty.
+			 */
+			/*
+			 * akpm: if the caller was the kupdate function we put
+			 * this inode at the head of b_dirty so it gets first
+			 * consideration.  Otherwise, move it to the tail, for
+			 * the reasons described there.  I'm not really sure
+			 * how much sense this makes.  Presumably I had a good
+			 * reasons for doing it this way, and I'd rather not
+			 * muck with it at present.
+			 */
+			if (wbc->for_kupdate) {
+				/*
+				 * For the kupdate function we move the inode
+				 * to b_more_io so it will get more writeout as
+				 * soon as the queue becomes uncongested.
+				 */
+				inode->i_state |= I_DIRTY_PAGES;
+				if (wbc->nr_to_write <= 0) {
+					/*
+					 * slice used up: queue for next turn
+					 */
+					requeue_io(inode);
+				} else {
+					/*
+					 * somehow blocked: retry later
+					 */
+					redirty_tail(inode);
+				}
+			} else {
+				/*
+				 * Otherwise fully redirty the inode so that
+				 * other inodes on this superblock will get some
+				 * writeout.  Otherwise heavy writing to one
+				 * file would indefinitely suspend writeout of
+				 * all the other files.
+				 */
+				inode->i_state |= I_DIRTY_PAGES;
+				redirty_tail(inode);
+			}
+		} else if (inode->i_state & I_DIRTY) {
+			/*
+			 * Someone redirtied the inode while were writing back
+			 * the pages.
+			 */
+			redirty_tail(inode);
+		} else if (atomic_read(&inode->i_count)) {
+			/*
+			 * The inode is clean, inuse
+			 */
+			list_move(&inode->i_list, &inode_in_use);
+		} else {
+			/*
+			 * The inode is clean, unused
+			 */
+			list_move(&inode->i_list, &inode_unused);
+		}
+	}
+	inode_sync_complete(inode);
+	return ret;
+}
+
+/*
+ * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
+ * before calling writeback. So make sure that we do pin it, so it doesn't
+ * go away while we are writing inodes from it.
+ *
+ * Returns 0 if the super was successfully pinned (or pinning wasn't needed),
+ * 1 if we failed.
+ */
+static int pin_sb_for_writeback(struct writeback_control *wbc,
+				   struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+
+	/*
+	 * Caller must already hold the ref for this
+	 */
+	if (wbc->sync_mode == WB_SYNC_ALL) {
+		WARN_ON(!rwsem_is_locked(&sb->s_umount));
+		return 0;
+	}
+
+	spin_lock(&sb_lock);
+	sb->s_count++;
+	if (down_read_trylock(&sb->s_umount)) {
+		if (sb->s_root) {
+			spin_unlock(&sb_lock);
+			return 0;
+		}
+		/*
+		 * umounted, drop rwsem again and fall through to failure
+		 */
+		up_read(&sb->s_umount);
+	}
+
+	sb->s_count--;
+	spin_unlock(&sb_lock);
+	return 1;
+}
+
+static void unpin_sb_for_writeback(struct writeback_control *wbc,
+				   struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		return;
+
+	up_read(&sb->s_umount);
+	put_super(sb);
+}
+
+static void writeback_inodes_wb(struct bdi_writeback *wb,
+				struct writeback_control *wbc)
+{
+	struct super_block *sb = wbc->sb;
+	const int is_blkdev_sb = sb_is_blkdev_sb(sb);
+	const unsigned long start = jiffies;	/* livelock avoidance */
+
+	spin_lock(&inode_lock);
+
+	if (!wbc->for_kupdate || list_empty(&wb->b_io))
+		queue_io(wb, wbc->older_than_this);
+
+	while (!list_empty(&wb->b_io)) {
+		struct inode *inode = list_entry(wb->b_io.prev,
+						struct inode, i_list);
+		long pages_skipped;
+
+		/*
+		 * super block given and doesn't match, skip this inode
+		 */
+		if (sb && sb != inode->i_sb) {
+			redirty_tail(inode);
+			continue;
+		}
+
+		if (!bdi_cap_writeback_dirty(wb->bdi)) {
+			redirty_tail(inode);
+			if (is_blkdev_sb) {
+				/*
+				 * Dirty memory-backed blockdev: the ramdisk
+				 * driver does this.  Skip just this inode
+				 */
+				continue;
+			}
+			/*
+			 * Dirty memory-backed inode against a filesystem other
+			 * than the kernel-internal bdev filesystem.  Skip the
+			 * entire superblock.
+			 */
+			break;
+		}
+
+		if (inode->i_state & (I_NEW | I_WILL_FREE)) {
+			requeue_io(inode);
+			continue;
+		}
+
+		if (wbc->nonblocking && bdi_write_congested(wb->bdi)) {
+			wbc->encountered_congestion = 1;
+			if (!is_blkdev_sb)
+				break;		/* Skip a congested fs */
+			requeue_io(inode);
+			continue;		/* Skip a congested blockdev */
+		}
+
+		/*
+		 * Was this inode dirtied after sync_sb_inodes was called?
+		 * This keeps sync from extra jobs and livelock.
+		 */
+		if (inode_dirtied_after(inode, start))
+			break;
+
+		if (pin_sb_for_writeback(wbc, inode)) {
+			requeue_io(inode);
+			continue;
+		}
+
+		BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
+		__iget(inode);
+		pages_skipped = wbc->pages_skipped;
+		writeback_single_inode(inode, wbc);
+		unpin_sb_for_writeback(wbc, inode);
+		if (wbc->pages_skipped != pages_skipped) {
+			/*
+			 * writeback is not making progress due to locked
+			 * buffers.  Skip this inode for now.
+			 */
+			redirty_tail(inode);
+		}
+		spin_unlock(&inode_lock);
+		iput(inode);
+		cond_resched();
+		spin_lock(&inode_lock);
+		if (wbc->nr_to_write <= 0) {
+			wbc->more_io = 1;
+			break;
+		}
+		if (!list_empty(&wb->b_more_io))
+			wbc->more_io = 1;
+	}
+
+	spin_unlock(&inode_lock);
+	/* Leave any unwritten inodes on b_io */
+}
+
+void writeback_inodes_wbc(struct writeback_control *wbc)
+{
+	struct backing_dev_info *bdi = wbc->bdi;
+
+	writeback_inodes_wb(&bdi->wb, wbc);
+}
+
+/*
+ * The maximum number of pages to writeout in a single bdi flush/kupdate
+ * operation.  We do this so we don't hold I_SYNC against an inode for
+ * enormous amounts of time, which would block a userspace task which has
+ * been forced to throttle against that inode.  Also, the code reevaluates
+ * the dirty each time it has written this many pages.
+ */
+#define MAX_WRITEBACK_PAGES     1024
+
+static inline bool over_bground_thresh(void)
+{
+	unsigned long background_thresh, dirty_thresh;
+
+	get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
+
+	return (global_page_state(NR_FILE_DIRTY) +
+		global_page_state(NR_UNSTABLE_NFS) >= background_thresh);
+}
+
+/*
+ * Explicit flushing or periodic writeback of "old" data.
+ *
+ * Define "old": the first time one of an inode's pages is dirtied, we mark the
+ * dirtying-time in the inode's address_space.  So this periodic writeback code
+ * just walks the superblock inode list, writing back any inodes which are
+ * older than a specific point in time.
+ *
+ * Try to run once per dirty_writeback_interval.  But if a writeback event
+ * takes longer than a dirty_writeback_interval interval, then leave a
+ * one-second gap.
+ *
+ * older_than_this takes precedence over nr_to_write.  So we'll only write back
+ * all dirty pages if they are all attached to "old" mappings.
+ */
+static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
+			 struct super_block *sb,
+			 enum writeback_sync_modes sync_mode, int for_kupdate)
+{
+	struct writeback_control wbc = {
+		.bdi			= wb->bdi,
+		.sb			= sb,
+		.sync_mode		= sync_mode,
+		.older_than_this	= NULL,
+		.for_kupdate		= for_kupdate,
+		.range_cyclic		= 1,
+	};
+	unsigned long oldest_jif;
+	long wrote = 0;
+
+	if (wbc.for_kupdate) {
+		wbc.older_than_this = &oldest_jif;
+		oldest_jif = jiffies -
+				msecs_to_jiffies(dirty_expire_interval * 10);
+	}
+
+	for (;;) {
+		/*
+		 * Don't flush anything for non-integrity writeback where
+		 * no nr_pages was given
+		 */
+		if (!for_kupdate && nr_pages <= 0 && sync_mode == WB_SYNC_NONE)
+			break;
+
+		/*
+		 * If no specific pages were given and this is just a
+		 * periodic background writeout and we are below the
+		 * background dirty threshold, don't do anything
+		 */
+		if (for_kupdate && nr_pages <= 0 && !over_bground_thresh())
+			break;
+
+		wbc.more_io = 0;
+		wbc.encountered_congestion = 0;
+		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
+		wbc.pages_skipped = 0;
+		writeback_inodes_wb(wb, &wbc);
+		nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+		wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+
+		/*
+		 * If we ran out of stuff to write, bail unless more_io got set
+		 */
+		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
+			if (wbc.more_io && !wbc.for_kupdate)
+				continue;
+			break;
+		}
+	}
+
+	return wrote;
+}
+
+/*
+ * Return the next bdi_work struct that hasn't been processed by this
+ * wb thread yet
+ */
+static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
+					   struct bdi_writeback *wb)
+{
+	struct bdi_work *work, *ret = NULL;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(work, &bdi->work_list, list) {
+		if (!test_and_clear_bit(wb->nr, &work->seen))
+			continue;
+
+		ret = work;
+		break;
+	}
+
+	rcu_read_unlock();
+	return ret;
+}
+
+static long wb_check_old_data_flush(struct bdi_writeback *wb)
+{
+	unsigned long expired;
+	long nr_pages;
+
+	expired = wb->last_old_flush +
+			msecs_to_jiffies(dirty_writeback_interval * 10);
+	if (time_before(jiffies, expired))
+		return 0;
+
+	wb->last_old_flush = jiffies;
+	nr_pages = global_page_state(NR_FILE_DIRTY) +
+			global_page_state(NR_UNSTABLE_NFS) +
+			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
+
+	if (nr_pages)
+		return wb_writeback(wb, nr_pages, NULL, WB_SYNC_NONE, 1);
+
+	return 0;
+}
+
+/*
+ * Retrieve work items and do the writeback they describe
+ */
+long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
+{
+	struct backing_dev_info *bdi = wb->bdi;
+	struct bdi_work *work;
+	long nr_pages, wrote = 0;
+
+	while ((work = get_next_work_item(bdi, wb)) != NULL) {
+		enum writeback_sync_modes sync_mode;
+
+		nr_pages = work->nr_pages;
+
+		/*
+		 * Override sync mode, in case we must wait for completion
+		 */
+		if (force_wait)
+			work->sync_mode = sync_mode = WB_SYNC_ALL;
+		else
+			sync_mode = work->sync_mode;
+
+		/*
+		 * If this isn't a data integrity operation, just notify
+		 * that we have seen this work and we are now starting it.
+		 */
+		if (sync_mode == WB_SYNC_NONE)
+			wb_clear_pending(wb, work);
+
+		wrote += wb_writeback(wb, nr_pages, work->sb, sync_mode, 0);
+
+		/*
+		 * This is a data integrity writeback, so only do the
+		 * notification when we have completed the work.
+		 */
+		if (sync_mode == WB_SYNC_ALL)
+			wb_clear_pending(wb, work);
+	}
+
+	/*
+	 * Check for periodic writeback, kupdated() style
+	 */
+	wrote += wb_check_old_data_flush(wb);
+
+	return wrote;
+}
+
+/*
+ * Handle writeback of dirty data for the device backed by this bdi. Also
+ * wakes up periodically and does kupdated style flushing.
+ */
+int bdi_writeback_task(struct bdi_writeback *wb)
+{
+	unsigned long last_active = jiffies;
+	unsigned long wait_jiffies = -1UL;
+	long pages_written;
+
+	while (!kthread_should_stop()) {
+		pages_written = wb_do_writeback(wb, 0);
+
+		if (pages_written)
+			last_active = jiffies;
+		else if (wait_jiffies != -1UL) {
+			unsigned long max_idle;
+
+			/*
+			 * Longest period of inactivity that we tolerate. If we
+			 * see dirty data again later, the task will get
+			 * recreated automatically.
+			 */
+			max_idle = max(5UL * 60 * HZ, wait_jiffies);
+			if (time_after(jiffies, max_idle + last_active))
+				break;
+		}
+
+		wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(wait_jiffies);
+		try_to_freeze();
+	}
+
+	return 0;
+}
+
+/*
+ * Schedule writeback for all backing devices. Expensive! If this is a data
+ * integrity operation, writeback will be complete when this returns. If
+ * we are simply called for WB_SYNC_NONE, then writeback will merely be
+ * scheduled to run.
+ */
+static void bdi_writeback_all(struct writeback_control *wbc)
+{
+	const bool must_wait = wbc->sync_mode == WB_SYNC_ALL;
+	struct backing_dev_info *bdi;
+	struct bdi_work *work;
+	LIST_HEAD(list);
+
+restart:
+	spin_lock(&bdi_lock);
+
+	list_for_each_entry(bdi, &bdi_list, bdi_list) {
+		struct bdi_work *work;
+
+		if (!bdi_has_dirty_io(bdi))
+			continue;
+
+		/*
+		 * If work allocation fails, do the writes inline. We drop
+		 * the lock and restart the list writeout. This should be OK,
+		 * since this happens rarely and because the writeout should
+		 * eventually make more free memory available.
+		 */
+		work = bdi_alloc_work(wbc);
+		if (!work) {
+			struct writeback_control __wbc;
+
+			/*
+			 * Not a data integrity writeout, just continue
+			 */
+			if (!must_wait)
+				continue;
+
+			spin_unlock(&bdi_lock);
+			__wbc = *wbc;
+			__wbc.bdi = bdi;
+			writeback_inodes_wbc(&__wbc);
+			goto restart;
+		}
+		if (must_wait)
+			list_add_tail(&work->wait_list, &list);
+
+		bdi_queue_work(bdi, work);
+	}
+
+	spin_unlock(&bdi_lock);
+
+	/*
+	 * If this is for WB_SYNC_ALL, wait for pending work to complete
+	 * before returning.
+	 */
+	while (!list_empty(&list)) {
+		work = list_entry(list.next, struct bdi_work, wait_list);
+		list_del(&work->wait_list);
+		bdi_wait_on_work_clear(work);
+		call_rcu(&work->rcu_head, bdi_work_free);
+	}
+}
+
+/*
+ * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
+ * the whole world.
+ */
+void wakeup_flusher_threads(long nr_pages)
+{
+	struct writeback_control wbc = {
+		.sync_mode	= WB_SYNC_NONE,
+		.older_than_this = NULL,
+		.range_cyclic	= 1,
+	};
+
+	if (nr_pages == 0)
+		nr_pages = global_page_state(NR_FILE_DIRTY) +
+				global_page_state(NR_UNSTABLE_NFS);
+	wbc.nr_to_write = nr_pages;
+	bdi_writeback_all(&wbc);
 }
 
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
@@ -165,274 +1041,29 @@
 			goto out;
 
 		/*
-		 * If the inode was already on s_dirty/s_io/s_more_io, don't
-		 * reposition it (that would break s_dirty time-ordering).
+		 * If the inode was already on b_dirty/b_io/b_more_io, don't
+		 * reposition it (that would break b_dirty time-ordering).
 		 */
 		if (!was_dirty) {
+			struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+			struct backing_dev_info *bdi = wb->bdi;
+
+			if (bdi_cap_writeback_dirty(bdi) &&
+			    !test_bit(BDI_registered, &bdi->state)) {
+				WARN_ON(1);
+				printk(KERN_ERR "bdi-%s not registered\n",
+								bdi->name);
+			}
+
 			inode->dirtied_when = jiffies;
-			list_move(&inode->i_list, &sb->s_dirty);
+			list_move(&inode->i_list, &wb->b_dirty);
 		}
 	}
 out:
 	spin_unlock(&inode_lock);
 }
-
 EXPORT_SYMBOL(__mark_inode_dirty);
 
-static int write_inode(struct inode *inode, int sync)
-{
-	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
-		return inode->i_sb->s_op->write_inode(inode, sync);
-	return 0;
-}
-
-/*
- * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
- * furthest end of its superblock's dirty-inode list.
- *
- * Before stamping the inode's ->dirtied_when, we check to see whether it is
- * already the most-recently-dirtied inode on the s_dirty list.  If that is
- * the case then the inode must have been redirtied while it was being written
- * out and we don't reset its dirtied_when.
- */
-static void redirty_tail(struct inode *inode)
-{
-	struct super_block *sb = inode->i_sb;
-
-	if (!list_empty(&sb->s_dirty)) {
-		struct inode *tail_inode;
-
-		tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list);
-		if (time_before(inode->dirtied_when,
-				tail_inode->dirtied_when))
-			inode->dirtied_when = jiffies;
-	}
-	list_move(&inode->i_list, &sb->s_dirty);
-}
-
-/*
- * requeue inode for re-scanning after sb->s_io list is exhausted.
- */
-static void requeue_io(struct inode *inode)
-{
-	list_move(&inode->i_list, &inode->i_sb->s_more_io);
-}
-
-static void inode_sync_complete(struct inode *inode)
-{
-	/*
-	 * Prevent speculative execution through spin_unlock(&inode_lock);
-	 */
-	smp_mb();
-	wake_up_bit(&inode->i_state, __I_SYNC);
-}
-
-static bool inode_dirtied_after(struct inode *inode, unsigned long t)
-{
-	bool ret = time_after(inode->dirtied_when, t);
-#ifndef CONFIG_64BIT
-	/*
-	 * For inodes being constantly redirtied, dirtied_when can get stuck.
-	 * It _appears_ to be in the future, but is actually in distant past.
-	 * This test is necessary to prevent such wrapped-around relative times
-	 * from permanently stopping the whole pdflush writeback.
-	 */
-	ret = ret && time_before_eq(inode->dirtied_when, jiffies);
-#endif
-	return ret;
-}
-
-/*
- * Move expired dirty inodes from @delaying_queue to @dispatch_queue.
- */
-static void move_expired_inodes(struct list_head *delaying_queue,
-			       struct list_head *dispatch_queue,
-				unsigned long *older_than_this)
-{
-	while (!list_empty(delaying_queue)) {
-		struct inode *inode = list_entry(delaying_queue->prev,
-						struct inode, i_list);
-		if (older_than_this &&
-		    inode_dirtied_after(inode, *older_than_this))
-			break;
-		list_move(&inode->i_list, dispatch_queue);
-	}
-}
-
-/*
- * Queue all expired dirty inodes for io, eldest first.
- */
-static void queue_io(struct super_block *sb,
-				unsigned long *older_than_this)
-{
-	list_splice_init(&sb->s_more_io, sb->s_io.prev);
-	move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this);
-}
-
-int sb_has_dirty_inodes(struct super_block *sb)
-{
-	return !list_empty(&sb->s_dirty) ||
-	       !list_empty(&sb->s_io) ||
-	       !list_empty(&sb->s_more_io);
-}
-EXPORT_SYMBOL(sb_has_dirty_inodes);
-
-/*
- * Wait for writeback on an inode to complete.
- */
-static void inode_wait_for_writeback(struct inode *inode)
-{
-	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
-	wait_queue_head_t *wqh;
-
-	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
-	do {
-		spin_unlock(&inode_lock);
-		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
-		spin_lock(&inode_lock);
-	} while (inode->i_state & I_SYNC);
-}
-
-/*
- * Write out an inode's dirty pages.  Called under inode_lock.  Either the
- * caller has ref on the inode (either via __iget or via syscall against an fd)
- * or the inode has I_WILL_FREE set (via generic_forget_inode)
- *
- * If `wait' is set, wait on the writeout.
- *
- * The whole writeout design is quite complex and fragile.  We want to avoid
- * starvation of particular inodes when others are being redirtied, prevent
- * livelocks, etc.
- *
- * Called under inode_lock.
- */
-static int
-writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
-{
-	struct address_space *mapping = inode->i_mapping;
-	int wait = wbc->sync_mode == WB_SYNC_ALL;
-	unsigned dirty;
-	int ret;
-
-	if (!atomic_read(&inode->i_count))
-		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
-	else
-		WARN_ON(inode->i_state & I_WILL_FREE);
-
-	if (inode->i_state & I_SYNC) {
-		/*
-		 * If this inode is locked for writeback and we are not doing
-		 * writeback-for-data-integrity, move it to s_more_io so that
-		 * writeback can proceed with the other inodes on s_io.
-		 *
-		 * We'll have another go at writing back this inode when we
-		 * completed a full scan of s_io.
-		 */
-		if (!wait) {
-			requeue_io(inode);
-			return 0;
-		}
-
-		/*
-		 * It's a data-integrity sync.  We must wait.
-		 */
-		inode_wait_for_writeback(inode);
-	}
-
-	BUG_ON(inode->i_state & I_SYNC);
-
-	/* Set I_SYNC, reset I_DIRTY */
-	dirty = inode->i_state & I_DIRTY;
-	inode->i_state |= I_SYNC;
-	inode->i_state &= ~I_DIRTY;
-
-	spin_unlock(&inode_lock);
-
-	ret = do_writepages(mapping, wbc);
-
-	/* Don't write the inode if only I_DIRTY_PAGES was set */
-	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
-		int err = write_inode(inode, wait);
-		if (ret == 0)
-			ret = err;
-	}
-
-	if (wait) {
-		int err = filemap_fdatawait(mapping);
-		if (ret == 0)
-			ret = err;
-	}
-
-	spin_lock(&inode_lock);
-	inode->i_state &= ~I_SYNC;
-	if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
-		if (!(inode->i_state & I_DIRTY) &&
-		    mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
-			/*
-			 * We didn't write back all the pages.  nfs_writepages()
-			 * sometimes bales out without doing anything. Redirty
-			 * the inode; Move it from s_io onto s_more_io/s_dirty.
-			 */
-			/*
-			 * akpm: if the caller was the kupdate function we put
-			 * this inode at the head of s_dirty so it gets first
-			 * consideration.  Otherwise, move it to the tail, for
-			 * the reasons described there.  I'm not really sure
-			 * how much sense this makes.  Presumably I had a good
-			 * reasons for doing it this way, and I'd rather not
-			 * muck with it at present.
-			 */
-			if (wbc->for_kupdate) {
-				/*
-				 * For the kupdate function we move the inode
-				 * to s_more_io so it will get more writeout as
-				 * soon as the queue becomes uncongested.
-				 */
-				inode->i_state |= I_DIRTY_PAGES;
-				if (wbc->nr_to_write <= 0) {
-					/*
-					 * slice used up: queue for next turn
-					 */
-					requeue_io(inode);
-				} else {
-					/*
-					 * somehow blocked: retry later
-					 */
-					redirty_tail(inode);
-				}
-			} else {
-				/*
-				 * Otherwise fully redirty the inode so that
-				 * other inodes on this superblock will get some
-				 * writeout.  Otherwise heavy writing to one
-				 * file would indefinitely suspend writeout of
-				 * all the other files.
-				 */
-				inode->i_state |= I_DIRTY_PAGES;
-				redirty_tail(inode);
-			}
-		} else if (inode->i_state & I_DIRTY) {
-			/*
-			 * Someone redirtied the inode while were writing back
-			 * the pages.
-			 */
-			redirty_tail(inode);
-		} else if (atomic_read(&inode->i_count)) {
-			/*
-			 * The inode is clean, inuse
-			 */
-			list_move(&inode->i_list, &inode_in_use);
-		} else {
-			/*
-			 * The inode is clean, unused
-			 */
-			list_move(&inode->i_list, &inode_unused);
-		}
-	}
-	inode_sync_complete(inode);
-	return ret;
-}
-
 /*
  * Write out a superblock's list of dirty inodes.  A wait will be performed
  * upon no inodes, all inodes or the final one, depending upon sync_mode.
@@ -440,7 +1071,7 @@
  * If older_than_this is non-NULL, then only write out inodes which
  * had their first dirtying at a time earlier than *older_than_this.
  *
- * If we're a pdflush thread, then implement pdflush collision avoidance
+ * If we're a pdlfush thread, then implement pdflush collision avoidance
  * against the entire list.
  *
  * If `bdi' is non-zero then we're being asked to writeback a specific queue.
@@ -448,240 +1079,114 @@
  * a variety of queues, so all inodes are searched.  For other superblocks,
  * assume that all inodes are backed by the same queue.
  *
- * FIXME: this linear search could get expensive with many fileystems.  But
- * how to fix?  We need to go from an address_space to all inodes which share
- * a queue with that address_space.  (Easy: have a global "dirty superblocks"
- * list).
- *
- * The inodes to be written are parked on sb->s_io.  They are moved back onto
- * sb->s_dirty as they are selected for writing.  This way, none can be missed
+ * The inodes to be written are parked on bdi->b_io.  They are moved back onto
+ * bdi->b_dirty as they are selected for writing.  This way, none can be missed
  * on the writer throttling path, and we get decent balancing between many
  * throttled threads: we don't want them all piling up on inode_sync_wait.
  */
-void generic_sync_sb_inodes(struct super_block *sb,
-				struct writeback_control *wbc)
+static void wait_sb_inodes(struct writeback_control *wbc)
 {
-	const unsigned long start = jiffies;	/* livelock avoidance */
-	int sync = wbc->sync_mode == WB_SYNC_ALL;
+	struct inode *inode, *old_inode = NULL;
+
+	/*
+	 * We need to be protected against the filesystem going from
+	 * r/o to r/w or vice versa.
+	 */
+	WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount));
 
 	spin_lock(&inode_lock);
-	if (!wbc->for_kupdate || list_empty(&sb->s_io))
-		queue_io(sb, wbc->older_than_this);
 
-	while (!list_empty(&sb->s_io)) {
-		struct inode *inode = list_entry(sb->s_io.prev,
-						struct inode, i_list);
-		struct address_space *mapping = inode->i_mapping;
-		struct backing_dev_info *bdi = mapping->backing_dev_info;
-		long pages_skipped;
+	/*
+	 * Data integrity sync. Must wait for all pages under writeback,
+	 * because there may have been pages dirtied before our sync
+	 * call, but which had writeout started before we write it out.
+	 * In which case, the inode may not be on the dirty list, but
+	 * we still have to wait for that writeout.
+	 */
+	list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) {
+		struct address_space *mapping;
 
-		if (!bdi_cap_writeback_dirty(bdi)) {
-			redirty_tail(inode);
-			if (sb_is_blkdev_sb(sb)) {
-				/*
-				 * Dirty memory-backed blockdev: the ramdisk
-				 * driver does this.  Skip just this inode
-				 */
-				continue;
-			}
-			/*
-			 * Dirty memory-backed inode against a filesystem other
-			 * than the kernel-internal bdev filesystem.  Skip the
-			 * entire superblock.
-			 */
-			break;
-		}
-
-		if (inode->i_state & (I_NEW | I_WILL_FREE)) {
-			requeue_io(inode);
+		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
 			continue;
-		}
-
-		if (wbc->nonblocking && bdi_write_congested(bdi)) {
-			wbc->encountered_congestion = 1;
-			if (!sb_is_blkdev_sb(sb))
-				break;		/* Skip a congested fs */
-			requeue_io(inode);
-			continue;		/* Skip a congested blockdev */
-		}
-
-		if (wbc->bdi && bdi != wbc->bdi) {
-			if (!sb_is_blkdev_sb(sb))
-				break;		/* fs has the wrong queue */
-			requeue_io(inode);
-			continue;		/* blockdev has wrong queue */
-		}
-
-		/*
-		 * Was this inode dirtied after sync_sb_inodes was called?
-		 * This keeps sync from extra jobs and livelock.
-		 */
-		if (inode_dirtied_after(inode, start))
-			break;
-
-		/* Is another pdflush already flushing this queue? */
-		if (current_is_pdflush() && !writeback_acquire(bdi))
-			break;
-
-		BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
+		mapping = inode->i_mapping;
+		if (mapping->nrpages == 0)
+			continue;
 		__iget(inode);
-		pages_skipped = wbc->pages_skipped;
-		writeback_single_inode(inode, wbc);
-		if (current_is_pdflush())
-			writeback_release(bdi);
-		if (wbc->pages_skipped != pages_skipped) {
-			/*
-			 * writeback is not making progress due to locked
-			 * buffers.  Skip this inode for now.
-			 */
-			redirty_tail(inode);
-		}
 		spin_unlock(&inode_lock);
-		iput(inode);
-		cond_resched();
-		spin_lock(&inode_lock);
-		if (wbc->nr_to_write <= 0) {
-			wbc->more_io = 1;
-			break;
-		}
-		if (!list_empty(&sb->s_more_io))
-			wbc->more_io = 1;
-	}
-
-	if (sync) {
-		struct inode *inode, *old_inode = NULL;
-
 		/*
-		 * Data integrity sync. Must wait for all pages under writeback,
-		 * because there may have been pages dirtied before our sync
-		 * call, but which had writeout started before we write it out.
-		 * In which case, the inode may not be on the dirty list, but
-		 * we still have to wait for that writeout.
+		 * We hold a reference to 'inode' so it couldn't have
+		 * been removed from s_inodes list while we dropped the
+		 * inode_lock.  We cannot iput the inode now as we can
+		 * be holding the last reference and we cannot iput it
+		 * under inode_lock. So we keep the reference and iput
+		 * it later.
 		 */
-		list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-			struct address_space *mapping;
-
-			if (inode->i_state &
-					(I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
-				continue;
-			mapping = inode->i_mapping;
-			if (mapping->nrpages == 0)
-				continue;
-			__iget(inode);
-			spin_unlock(&inode_lock);
-			/*
-			 * We hold a reference to 'inode' so it couldn't have
-			 * been removed from s_inodes list while we dropped the
-			 * inode_lock.  We cannot iput the inode now as we can
-			 * be holding the last reference and we cannot iput it
-			 * under inode_lock. So we keep the reference and iput
-			 * it later.
-			 */
-			iput(old_inode);
-			old_inode = inode;
-
-			filemap_fdatawait(mapping);
-
-			cond_resched();
-
-			spin_lock(&inode_lock);
-		}
-		spin_unlock(&inode_lock);
 		iput(old_inode);
-	} else
-		spin_unlock(&inode_lock);
+		old_inode = inode;
 
-	return;		/* Leave any unwritten inodes on s_io */
-}
-EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
+		filemap_fdatawait(mapping);
 
-static void sync_sb_inodes(struct super_block *sb,
-				struct writeback_control *wbc)
-{
-	generic_sync_sb_inodes(sb, wbc);
-}
+		cond_resched();
 
-/*
- * Start writeback of dirty pagecache data against all unlocked inodes.
- *
- * Note:
- * We don't need to grab a reference to superblock here. If it has non-empty
- * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
- * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all
- * empty. Since __sync_single_inode() regains inode_lock before it finally moves
- * inode from superblock lists we are OK.
- *
- * If `older_than_this' is non-zero then only flush inodes which have a
- * flushtime older than *older_than_this.
- *
- * If `bdi' is non-zero then we will scan the first inode against each
- * superblock until we find the matching ones.  One group will be the dirty
- * inodes against a filesystem.  Then when we hit the dummy blockdev superblock,
- * sync_sb_inodes will seekout the blockdev which matches `bdi'.  Maybe not
- * super-efficient but we're about to do a ton of I/O...
- */
-void
-writeback_inodes(struct writeback_control *wbc)
-{
-	struct super_block *sb;
-
-	might_sleep();
-	spin_lock(&sb_lock);
-restart:
-	list_for_each_entry_reverse(sb, &super_blocks, s_list) {
-		if (sb_has_dirty_inodes(sb)) {
-			/* we're making our own get_super here */
-			sb->s_count++;
-			spin_unlock(&sb_lock);
-			/*
-			 * If we can't get the readlock, there's no sense in
-			 * waiting around, most of the time the FS is going to
-			 * be unmounted by the time it is released.
-			 */
-			if (down_read_trylock(&sb->s_umount)) {
-				if (sb->s_root)
-					sync_sb_inodes(sb, wbc);
-				up_read(&sb->s_umount);
-			}
-			spin_lock(&sb_lock);
-			if (__put_super_and_need_restart(sb))
-				goto restart;
-		}
-		if (wbc->nr_to_write <= 0)
-			break;
+		spin_lock(&inode_lock);
 	}
-	spin_unlock(&sb_lock);
+	spin_unlock(&inode_lock);
+	iput(old_inode);
 }
 
-/*
- * writeback and wait upon the filesystem's dirty inodes.  The caller will
- * do this in two passes - one to write, and one to wait.
+/**
+ * writeback_inodes_sb	-	writeback dirty inodes from given super_block
+ * @sb: the superblock
  *
- * A finite limit is set on the number of pages which will be written.
- * To prevent infinite livelock of sys_sync().
- *
- * We add in the number of potentially dirty inodes, because each inode write
- * can dirty pagecache in the underlying blockdev.
+ * Start writeback on some inodes on this super_block. No guarantees are made
+ * on how many (if any) will be written, and this function does not wait
+ * for IO completion of submitted IO. The number of pages submitted is
+ * returned.
  */
-void sync_inodes_sb(struct super_block *sb, int wait)
+long writeback_inodes_sb(struct super_block *sb)
 {
 	struct writeback_control wbc = {
-		.sync_mode	= wait ? WB_SYNC_ALL : WB_SYNC_NONE,
+		.sb		= sb,
+		.sync_mode	= WB_SYNC_NONE,
 		.range_start	= 0,
 		.range_end	= LLONG_MAX,
 	};
+	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
+	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+	long nr_to_write;
 
-	if (!wait) {
-		unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
-		unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
-
-		wbc.nr_to_write = nr_dirty + nr_unstable +
+	nr_to_write = nr_dirty + nr_unstable +
 			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
-	} else
-		wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */
 
-	sync_sb_inodes(sb, &wbc);
+	wbc.nr_to_write = nr_to_write;
+	bdi_writeback_all(&wbc);
+	return nr_to_write - wbc.nr_to_write;
 }
+EXPORT_SYMBOL(writeback_inodes_sb);
+
+/**
+ * sync_inodes_sb	-	sync sb inode pages
+ * @sb: the superblock
+ *
+ * This function writes and waits on any dirty inode belonging to this
+ * super_block. The number of pages synced is returned.
+ */
+long sync_inodes_sb(struct super_block *sb)
+{
+	struct writeback_control wbc = {
+		.sb		= sb,
+		.sync_mode	= WB_SYNC_ALL,
+		.range_start	= 0,
+		.range_end	= LLONG_MAX,
+	};
+	long nr_to_write = LONG_MAX; /* doesn't actually matter */
+
+	wbc.nr_to_write = nr_to_write;
+	bdi_writeback_all(&wbc);
+	wait_sb_inodes(&wbc);
+	return nr_to_write - wbc.nr_to_write;
+}
+EXPORT_SYMBOL(sync_inodes_sb);
 
 /**
  * write_inode_now	-	write an inode to disk

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f91ccc4..4567db6 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c

@@ -801,6 +801,7 @@
 {
 	int err;
 
+	fc->bdi.name = "fuse";
 	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index cb88dac..a93b885 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c

@@ -44,6 +44,7 @@
 static const struct inode_operations hugetlbfs_inode_operations;
 
 static struct backing_dev_info hugetlbfs_backing_dev_info = {
+	.name		= "hugetlbfs",
 	.ra_pages	= 0,	/* No readahead */
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };

diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 8fcb62392..7edb62e 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c

@@ -258,7 +258,7 @@
 	return rc;
 }
 
-static int jffs2_check_acl(struct inode *inode, int mask)
+int jffs2_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl;
 	int rc;
@@ -274,11 +274,6 @@
 	return -EAGAIN;
 }
 
-int jffs2_permission(struct inode *inode, int mask)
-{
-	return generic_permission(inode, mask, jffs2_check_acl);
-}
-
 int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
 {
 	struct posix_acl *acl, *clone;

diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index fc929f2..f0ba63e 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h

@@ -26,7 +26,7 @@
 
 #ifdef CONFIG_JFFS2_FS_POSIX_ACL
 
-extern int jffs2_permission(struct inode *, int);
+extern int jffs2_check_acl(struct inode *, int);
 extern int jffs2_acl_chmod(struct inode *);
 extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
 extern int jffs2_init_acl_post(struct inode *);
@@ -36,7 +36,7 @@
 
 #else
 
-#define jffs2_permission			(NULL)
+#define jffs2_check_acl				(NULL)
 #define jffs2_acl_chmod(inode)			(0)
 #define jffs2_init_acl_pre(dir_i,inode,mode)	(0)
 #define jffs2_init_acl_post(inode)		(0)

diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 6f60cc9..7aa4417 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c

@@ -55,7 +55,7 @@
 	.rmdir =	jffs2_rmdir,
 	.mknod =	jffs2_mknod,
 	.rename =	jffs2_rename,
-	.permission =	jffs2_permission,
+	.check_acl =	jffs2_check_acl,
 	.setattr =	jffs2_setattr,
 	.setxattr =	jffs2_setxattr,
 	.getxattr =	jffs2_getxattr,

diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 23c9475..b7b74e2 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c

@@ -56,7 +56,7 @@
 
 const struct inode_operations jffs2_file_inode_operations =
 {
-	.permission =	jffs2_permission,
+	.check_acl =	jffs2_check_acl,
 	.setattr =	jffs2_setattr,
 	.setxattr =	jffs2_setxattr,
 	.getxattr =	jffs2_getxattr,

diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index b7339c3..4ec11e8 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c

@@ -21,7 +21,7 @@
 {
 	.readlink =	generic_readlink,
 	.follow_link =	jffs2_follow_link,
-	.permission =	jffs2_permission,
+	.check_acl =	jffs2_check_acl,
 	.setattr =	jffs2_setattr,
 	.setxattr =	jffs2_setxattr,
 	.getxattr =	jffs2_getxattr,

diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index a29c7c3..d66477c 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c

@@ -114,7 +114,7 @@
 	return rc;
 }
 
-static int jfs_check_acl(struct inode *inode, int mask)
+int jfs_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
 
@@ -129,11 +129,6 @@
 	return -EAGAIN;
 }
 
-int jfs_permission(struct inode *inode, int mask)
-{
-	return generic_permission(inode, mask, jfs_check_acl);
-}
-
 int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
 {
 	struct posix_acl *acl = NULL;

diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 7f6063a..2b70fa7 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c

@@ -96,7 +96,7 @@
 	.removexattr	= jfs_removexattr,
 #ifdef CONFIG_JFS_POSIX_ACL
 	.setattr	= jfs_setattr,
-	.permission	= jfs_permission,
+	.check_acl	= jfs_check_acl,
 #endif
 };
 

diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 88475f1..b07bd41 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h

@@ -20,7 +20,7 @@
 
 #ifdef CONFIG_JFS_POSIX_ACL
 
-int jfs_permission(struct inode *, int);
+int jfs_check_acl(struct inode *, int);
 int jfs_init_acl(tid_t, struct inode *, struct inode *);
 int jfs_setattr(struct dentry *, struct iattr *);
 

diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 514ee2e..c79a4270 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c

@@ -1543,7 +1543,7 @@
 	.removexattr	= jfs_removexattr,
 #ifdef CONFIG_JFS_POSIX_ACL
 	.setattr	= jfs_setattr,
-	.permission	= jfs_permission,
+	.check_acl	= jfs_check_acl,
 #endif
 };
 

diff --git a/fs/locks.c b/fs/locks.c
index b6440f5..52366e8 100644
--- a/fs/locks.c
+++ b/fs/locks.c

@@ -1591,7 +1591,7 @@
 	if (can_sleep)
 		lock->fl_flags |= FL_SLEEP;
 
-	error = security_file_lock(filp, cmd);
+	error = security_file_lock(filp, lock->fl_type);
 	if (error)
 		goto out_free;
 

diff --git a/fs/namei.c b/fs/namei.c
index 1f13751..d11f404 100644
--- a/fs/namei.c
+++ b/fs/namei.c

@@ -169,6 +169,36 @@
 EXPORT_SYMBOL(putname);
 #endif
 
+/*
+ * This does basic POSIX ACL permission checking
+ */
+static int acl_permission_check(struct inode *inode, int mask,
+		int (*check_acl)(struct inode *inode, int mask))
+{
+	umode_t			mode = inode->i_mode;
+
+	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
+
+	if (current_fsuid() == inode->i_uid)
+		mode >>= 6;
+	else {
+		if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
+			int error = check_acl(inode, mask);
+			if (error != -EAGAIN)
+				return error;
+		}
+
+		if (in_group_p(inode->i_gid))
+			mode >>= 3;
+	}
+
+	/*
+	 * If the DACs are ok we don't need any capability check.
+	 */
+	if ((mask & ~mode) == 0)
+		return 0;
+	return -EACCES;
+}
 
 /**
  * generic_permission  -  check for access rights on a Posix-like filesystem
@@ -184,32 +214,15 @@
 int generic_permission(struct inode *inode, int mask,
 		int (*check_acl)(struct inode *inode, int mask))
 {
-	umode_t			mode = inode->i_mode;
-
-	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
-
-	if (current_fsuid() == inode->i_uid)
-		mode >>= 6;
-	else {
-		if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
-			int error = check_acl(inode, mask);
-			if (error == -EACCES)
-				goto check_capabilities;
-			else if (error != -EAGAIN)
-				return error;
-		}
-
-		if (in_group_p(inode->i_gid))
-			mode >>= 3;
-	}
+	int ret;
 
 	/*
-	 * If the DACs are ok we don't need any capability check.
+	 * Do the basic POSIX ACL permission checks.
 	 */
-	if ((mask & ~mode) == 0)
-		return 0;
+	ret = acl_permission_check(inode, mask, check_acl);
+	if (ret != -EACCES)
+		return ret;
 
- check_capabilities:
 	/*
 	 * Read/write DACs are always overridable.
 	 * Executable DACs are overridable if at least one exec bit is set.
@@ -262,7 +275,7 @@
 	if (inode->i_op->permission)
 		retval = inode->i_op->permission(inode, mask);
 	else
-		retval = generic_permission(inode, mask, NULL);
+		retval = generic_permission(inode, mask, inode->i_op->check_acl);
 
 	if (retval)
 		return retval;
@@ -432,29 +445,22 @@
  */
 static int exec_permission_lite(struct inode *inode)
 {
-	umode_t	mode = inode->i_mode;
+	int ret;
 
-	if (inode->i_op->permission)
-		return -EAGAIN;
-
-	if (current_fsuid() == inode->i_uid)
-		mode >>= 6;
-	else if (in_group_p(inode->i_gid))
-		mode >>= 3;
-
-	if (mode & MAY_EXEC)
+	if (inode->i_op->permission) {
+		ret = inode->i_op->permission(inode, MAY_EXEC);
+		if (!ret)
+			goto ok;
+		return ret;
+	}
+	ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl);
+	if (!ret)
 		goto ok;
 
-	if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
+	if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
 		goto ok;
 
-	if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE))
-		goto ok;
-
-	if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH))
-		goto ok;
-
-	return -EACCES;
+	return ret;
 ok:
 	return security_inode_permission(inode, MAY_EXEC);
 }
@@ -853,12 +859,6 @@
 
 		nd->flags |= LOOKUP_CONTINUE;
 		err = exec_permission_lite(inode);
-		if (err == -EAGAIN)
-			err = inode_permission(nd->path.dentry->d_inode,
-					       MAY_EXEC);
-		if (!err)
-			err = ima_path_check(&nd->path, MAY_EXEC,
-				             IMA_COUNT_UPDATE);
  		if (err)
 			break;
 
@@ -1533,9 +1533,11 @@
 	if (error)
 		return error;
 
-	error = ima_path_check(path,
-			       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC),
+	error = ima_path_check(path, acc_mode ?
+			       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
+			       ACC_MODE(flag) & (MAY_READ | MAY_WRITE),
 			       IMA_COUNT_UPDATE);
+
 	if (error)
 		return error;
 	/*

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8d25ccb..c6be84a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c

@@ -879,6 +879,7 @@
 		server->rsize = NFS_MAX_FILE_IO_SIZE;
 	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
+	server->backing_dev_info.name = "nfs";
 	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
 
 	if (server->wsize > max_rpc_payload)

diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 5573508..36fcabbf 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c

@@ -34,6 +34,8 @@
 	int flags = nfsexp_flags(rqstp, exp);
 	int ret;
 
+	validate_process_creds();
+
 	/* discard any old override before preparing the new set */
 	revert_creds(get_cred(current->real_cred));
 	new = prepare_creds();
@@ -86,8 +88,10 @@
 	else
 		new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
 							new->cap_permitted);
+	validate_process_creds();
 	put_cred(override_creds(new));
 	put_cred(new);
+	validate_process_creds();
 	return 0;
 
 oom:

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 492c79b..24d58ad 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c

@@ -496,7 +496,9 @@
 		/* Lock the export hash tables for reading. */
 		exp_readlock();
 
+		validate_process_creds();
 		svc_process(rqstp);
+		validate_process_creds();
 
 		/* Unlock export hash tables */
 		exp_readunlock();

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 23341c1..8fa09bf 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c

@@ -684,6 +684,8 @@
 	__be32		err;
 	int		host_err;
 
+	validate_process_creds();
+
 	/*
 	 * If we get here, then the client has already done an "open",
 	 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
@@ -740,6 +742,7 @@
 out_nfserr:
 	err = nfserrno(host_err);
 out:
+	validate_process_creds();
 	return err;
 }
 

diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 1c9efb4..02bf178 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c

@@ -325,6 +325,7 @@
 }
 
 static struct backing_dev_info dlmfs_backing_dev_info = {
+	.name		= "ocfs2-dlmfs",
 	.ra_pages	= 0,	/* No readahead */
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };

diff --git a/fs/open.c b/fs/open.c
index dd98e80..31191bf 100644
--- a/fs/open.c
+++ b/fs/open.c

@@ -199,7 +199,7 @@
 int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
 	struct file *filp)
 {
-	int err;
+	int ret;
 	struct iattr newattrs;
 
 	/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
@@ -214,12 +214,14 @@
 	}
 
 	/* Remove suid/sgid on truncate too */
-	newattrs.ia_valid |= should_remove_suid(dentry);
+	ret = should_remove_suid(dentry);
+	if (ret)
+		newattrs.ia_valid |= ret | ATTR_FORCE;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
-	err = notify_change(dentry, &newattrs);
+	ret = notify_change(dentry, &newattrs);
 	mutex_unlock(&dentry->d_inode->i_mutex);
-	return err;
+	return ret;
 }
 
 static long do_sys_truncate(const char __user *pathname, loff_t length)
@@ -957,6 +959,8 @@
 	int error;
 	struct file *f;
 
+	validate_creds(cred);
+
 	/*
 	 * We must always pass in a valid mount pointer.   Historically
 	 * callers got away with not passing it, but we must enforce this at

diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 0ff7566..a7f0110 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c

@@ -46,6 +46,7 @@
 static const struct inode_operations ramfs_dir_inode_operations;
 
 static struct backing_dev_info ramfs_backing_dev_info = {
+	.name		= "ramfs",
 	.ra_pages	= 0,	/* No readahead */
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK |
 			  BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |

diff --git a/fs/super.c b/fs/super.c
index 2761d3e..9cda337 100644
--- a/fs/super.c
+++ b/fs/super.c

@@ -62,9 +62,6 @@
 			s = NULL;
 			goto out;
 		}
-		INIT_LIST_HEAD(&s->s_dirty);
-		INIT_LIST_HEAD(&s->s_io);
-		INIT_LIST_HEAD(&s->s_more_io);
 		INIT_LIST_HEAD(&s->s_files);
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_HLIST_HEAD(&s->s_anon);
@@ -171,7 +168,7 @@
  *	Drops a temporary reference, frees superblock if there's no
  *	references left.
  */
-static void put_super(struct super_block *sb)
+void put_super(struct super_block *sb)
 {
 	spin_lock(&sb_lock);
 	__put_super(sb);

diff --git a/fs/sync.c b/fs/sync.c
index 3422ba6..103cc7f 100644
--- a/fs/sync.c
+++ b/fs/sync.c

@@ -19,20 +19,22 @@
 			SYNC_FILE_RANGE_WAIT_AFTER)
 
 /*
- * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
- * just dirties buffers with inodes so we have to submit IO for these buffers
- * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
- * case write_inode() functions do sync_dirty_buffer() and thus effectively
- * write one block at a time.
+ * Do the filesystem syncing work. For simple filesystems
+ * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to
+ * submit IO for these buffers via __sync_blockdev(). This also speeds up the
+ * wait == 1 case since in that case write_inode() functions do
+ * sync_dirty_buffer() and thus effectively write one block at a time.
  */
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
 	/* Avoid doing twice syncing and cache pruning for quota sync */
-	if (!wait)
+	if (!wait) {
 		writeout_quota_sb(sb, -1);
-	else
+		writeback_inodes_sb(sb);
+	} else {
 		sync_quota_sb(sb, -1);
-	sync_inodes_sb(sb, wait);
+		sync_inodes_sb(sb);
+	}
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, wait);
 	return __sync_blockdev(sb->s_bdev, wait);
@@ -118,7 +120,7 @@
  */
 SYSCALL_DEFINE0(sync)
 {
-	wakeup_pdflush(0);
+	wakeup_flusher_threads(0);
 	sync_filesystems(0);
 	sync_filesystems(1);
 	if (unlikely(laptop_mode))

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 14f2d71..0050fc4 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c

@@ -760,6 +760,7 @@
 const struct inode_operations sysfs_dir_inode_operations = {
 	.lookup		= sysfs_lookup,
 	.setattr	= sysfs_setattr,
+	.setxattr	= sysfs_setxattr,
 };
 
 static void remove_dir(struct sysfs_dirent *sd)

diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 555f0ff..e28cecf 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c

@@ -18,6 +18,8 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
 #include "sysfs.h"
 
 extern struct super_block * sysfs_sb;
@@ -29,12 +31,14 @@
 };
 
 static struct backing_dev_info sysfs_backing_dev_info = {
+	.name		= "sysfs",
 	.ra_pages	= 0,	/* No readahead */
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
 
 static const struct inode_operations sysfs_inode_operations ={
 	.setattr	= sysfs_setattr,
+	.setxattr	= sysfs_setxattr,
 };
 
 int __init sysfs_inode_init(void)
@@ -42,18 +46,37 @@
 	return bdi_init(&sysfs_backing_dev_info);
 }
 
+struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
+{
+	struct sysfs_inode_attrs *attrs;
+	struct iattr *iattrs;
+
+	attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL);
+	if (!attrs)
+		return NULL;
+	iattrs = &attrs->ia_iattr;
+
+	/* assign default attributes */
+	iattrs->ia_mode = sd->s_mode;
+	iattrs->ia_uid = 0;
+	iattrs->ia_gid = 0;
+	iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
+
+	return attrs;
+}
 int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
 {
 	struct inode * inode = dentry->d_inode;
 	struct sysfs_dirent * sd = dentry->d_fsdata;
-	struct iattr * sd_iattr;
+	struct sysfs_inode_attrs *sd_attrs;
+	struct iattr *iattrs;
 	unsigned int ia_valid = iattr->ia_valid;
 	int error;
 
 	if (!sd)
 		return -EINVAL;
 
-	sd_iattr = sd->s_iattr;
+	sd_attrs = sd->s_iattr;
 
 	error = inode_change_ok(inode, iattr);
 	if (error)
@@ -65,42 +88,77 @@
 	if (error)
 		return error;
 
-	if (!sd_iattr) {
+	if (!sd_attrs) {
 		/* setting attributes for the first time, allocate now */
-		sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL);
-		if (!sd_iattr)
+		sd_attrs = sysfs_init_inode_attrs(sd);
+		if (!sd_attrs)
 			return -ENOMEM;
-		/* assign default attributes */
-		sd_iattr->ia_mode = sd->s_mode;
-		sd_iattr->ia_uid = 0;
-		sd_iattr->ia_gid = 0;
-		sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
-		sd->s_iattr = sd_iattr;
+		sd->s_iattr = sd_attrs;
+	} else {
+		/* attributes were changed at least once in past */
+		iattrs = &sd_attrs->ia_iattr;
+
+		if (ia_valid & ATTR_UID)
+			iattrs->ia_uid = iattr->ia_uid;
+		if (ia_valid & ATTR_GID)
+			iattrs->ia_gid = iattr->ia_gid;
+		if (ia_valid & ATTR_ATIME)
+			iattrs->ia_atime = timespec_trunc(iattr->ia_atime,
+					inode->i_sb->s_time_gran);
+		if (ia_valid & ATTR_MTIME)
+			iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime,
+					inode->i_sb->s_time_gran);
+		if (ia_valid & ATTR_CTIME)
+			iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime,
+					inode->i_sb->s_time_gran);
+		if (ia_valid & ATTR_MODE) {
+			umode_t mode = iattr->ia_mode;
+
+			if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+				mode &= ~S_ISGID;
+			iattrs->ia_mode = sd->s_mode = mode;
+		}
 	}
+	return error;
+}
 
-	/* attributes were changed atleast once in past */
+int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+		size_t size, int flags)
+{
+	struct sysfs_dirent *sd = dentry->d_fsdata;
+	struct sysfs_inode_attrs *iattrs;
+	void *secdata;
+	int error;
+	u32 secdata_len = 0;
 
-	if (ia_valid & ATTR_UID)
-		sd_iattr->ia_uid = iattr->ia_uid;
-	if (ia_valid & ATTR_GID)
-		sd_iattr->ia_gid = iattr->ia_gid;
-	if (ia_valid & ATTR_ATIME)
-		sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime,
-						inode->i_sb->s_time_gran);
-	if (ia_valid & ATTR_MTIME)
-		sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime,
-						inode->i_sb->s_time_gran);
-	if (ia_valid & ATTR_CTIME)
-		sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime,
-						inode->i_sb->s_time_gran);
-	if (ia_valid & ATTR_MODE) {
-		umode_t mode = iattr->ia_mode;
+	if (!sd)
+		return -EINVAL;
+	if (!sd->s_iattr)
+		sd->s_iattr = sysfs_init_inode_attrs(sd);
+	if (!sd->s_iattr)
+		return -ENOMEM;
 
-		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-			mode &= ~S_ISGID;
-		sd_iattr->ia_mode = sd->s_mode = mode;
-	}
+	iattrs = sd->s_iattr;
 
+	if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
+		const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+		error = security_inode_setsecurity(dentry->d_inode, suffix,
+						value, size, flags);
+		if (error)
+			goto out;
+		error = security_inode_getsecctx(dentry->d_inode,
+						&secdata, &secdata_len);
+		if (error)
+			goto out;
+		if (iattrs->ia_secdata)
+			security_release_secctx(iattrs->ia_secdata,
+						iattrs->ia_secdata_len);
+		iattrs->ia_secdata = secdata;
+		iattrs->ia_secdata_len = secdata_len;
+
+	} else
+		return -EINVAL;
+out:
 	return error;
 }
 
@@ -146,6 +204,7 @@
 static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
 {
 	struct bin_attribute *bin_attr;
+	struct sysfs_inode_attrs *iattrs;
 
 	inode->i_private = sysfs_get(sd);
 	inode->i_mapping->a_ops = &sysfs_aops;
@@ -154,16 +213,20 @@
 	inode->i_ino = sd->s_ino;
 	lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
 
-	if (sd->s_iattr) {
+	iattrs = sd->s_iattr;
+	if (iattrs) {
 		/* sysfs_dirent has non-default attributes
 		 * get them for the new inode from persistent copy
 		 * in sysfs_dirent
 		 */
-		set_inode_attr(inode, sd->s_iattr);
+		set_inode_attr(inode, &iattrs->ia_iattr);
+		if (iattrs->ia_secdata)
+			security_inode_notifysecctx(inode,
+						iattrs->ia_secdata,
+						iattrs->ia_secdata_len);
 	} else
 		set_default_inode_attr(inode, sd->s_mode);
 
-
 	/* initialize inode according to type */
 	switch (sysfs_type(sd)) {
 	case SYSFS_DIR:

diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 1d897ad..c5081ad 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c

@@ -16,6 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/namei.h>
 #include <linux/mutex.h>
+#include <linux/security.h>
 
 #include "sysfs.h"
 
@@ -209,6 +210,7 @@
 }
 
 const struct inode_operations sysfs_symlink_inode_operations = {
+	.setxattr = sysfs_setxattr,
 	.readlink = generic_readlink,
 	.follow_link = sysfs_follow_link,
 	.put_link = sysfs_put_link,

diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3fa0d984..af4c4e7 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h

@@ -8,6 +8,8 @@
  * This file is released under the GPLv2.
  */
 
+#include <linux/fs.h>
+
 struct sysfs_open_dirent;
 
 /* type-specific structures for sysfs_dirent->s_* union members */
@@ -31,6 +33,12 @@
 	struct hlist_head	buffers;
 };
 
+struct sysfs_inode_attrs {
+	struct iattr	ia_iattr;
+	void		*ia_secdata;
+	u32		ia_secdata_len;
+};
+
 /*
  * sysfs_dirent - the building block of sysfs hierarchy.  Each and
  * every sysfs node is represented by single sysfs_dirent.
@@ -56,7 +64,7 @@
 	unsigned int		s_flags;
 	ino_t			s_ino;
 	umode_t			s_mode;
-	struct iattr		*s_iattr;
+	struct sysfs_inode_attrs *s_iattr;
 };
 
 #define SD_DEACTIVATED_BIAS		INT_MIN
@@ -148,6 +156,8 @@
 struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
 void sysfs_delete_inode(struct inode *inode);
 int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
+int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+		size_t size, int flags);
 int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
 int sysfs_inode_init(void);
 

diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index eaf6d89..1c8991b 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c

@@ -65,26 +65,14 @@
 static int shrink_liability(struct ubifs_info *c, int nr_to_write)
 {
 	int nr_written;
-	struct writeback_control wbc = {
-		.sync_mode   = WB_SYNC_NONE,
-		.range_end   = LLONG_MAX,
-		.nr_to_write = nr_to_write,
-	};
 
-	generic_sync_sb_inodes(c->vfs_sb, &wbc);
-	nr_written = nr_to_write - wbc.nr_to_write;
-
+	nr_written = writeback_inodes_sb(c->vfs_sb);
 	if (!nr_written) {
 		/*
 		 * Re-try again but wait on pages/inodes which are being
 		 * written-back concurrently (e.g., by pdflush).
 		 */
-		memset(&wbc, 0, sizeof(struct writeback_control));
-		wbc.sync_mode   = WB_SYNC_ALL;
-		wbc.range_end   = LLONG_MAX;
-		wbc.nr_to_write = nr_to_write;
-		generic_sync_sb_inodes(c->vfs_sb, &wbc);
-		nr_written = nr_to_write - wbc.nr_to_write;
+		nr_written = sync_inodes_sb(c->vfs_sb);
 	}
 
 	dbg_budg("%d pages were written back", nr_written);

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 26d2e0d..51763aa 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c

@@ -438,12 +438,6 @@
 {
 	int i, err;
 	struct ubifs_info *c = sb->s_fs_info;
-	struct writeback_control wbc = {
-		.sync_mode   = WB_SYNC_ALL,
-		.range_start = 0,
-		.range_end   = LLONG_MAX,
-		.nr_to_write = LONG_MAX,
-	};
 
 	/*
 	 * Zero @wait is just an advisory thing to help the file system shove
@@ -462,7 +456,7 @@
 	 * the user be able to get more accurate results of 'statfs()' after
 	 * they synchronize the file system.
 	 */
-	generic_sync_sb_inodes(sb, &wbc);
+	sync_inodes_sb(sb);
 
 	/*
 	 * Synchronize write buffers, because 'ubifs_run_commit()' does not
@@ -1971,6 +1965,7 @@
 	 *
 	 * Read-ahead will be disabled because @c->bdi.ra_pages is 0.
 	 */
+	c->bdi.name = "ubifs",
 	c->bdi.capabilities = BDI_CAP_MAP_COPY;
 	c->bdi.unplug_io_fn = default_unplug_io_fn;
 	err  = bdi_init(&c->bdi);

diff --git a/fs/xattr.c b/fs/xattr.c
index 1c3d0af..6d4f6d3 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c

@@ -66,22 +66,28 @@
 	return inode_permission(inode, mask);
 }
 
-int
-vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
-		size_t size, int flags)
+/**
+ *  __vfs_setxattr_noperm - perform setxattr operation without performing
+ *  permission checks.
+ *
+ *  @dentry - object to perform setxattr on
+ *  @name - xattr name to set
+ *  @value - value to set @name to
+ *  @size - size of @value
+ *  @flags - flags to pass into filesystem operations
+ *
+ *  returns the result of the internal setxattr or setsecurity operations.
+ *
+ *  This function requires the caller to lock the inode's i_mutex before it
+ *  is executed. It also assumes that the caller will make the appropriate
+ *  permission checks.
+ */
+int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags)
 {
 	struct inode *inode = dentry->d_inode;
-	int error;
+	int error = -EOPNOTSUPP;
 
-	error = xattr_permission(inode, name, MAY_WRITE);
-	if (error)
-		return error;
-
-	mutex_lock(&inode->i_mutex);
-	error = security_inode_setxattr(dentry, name, value, size, flags);
-	if (error)
-		goto out;
-	error = -EOPNOTSUPP;
 	if (inode->i_op->setxattr) {
 		error = inode->i_op->setxattr(dentry, name, value, size, flags);
 		if (!error) {
@@ -97,6 +103,29 @@
 		if (!error)
 			fsnotify_xattr(dentry);
 	}
+
+	return error;
+}
+
+
+int
+vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+		size_t size, int flags)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = xattr_permission(inode, name, MAY_WRITE);
+	if (error)
+		return error;
+
+	mutex_lock(&inode->i_mutex);
+	error = security_inode_setxattr(dentry, name, value, size, flags);
+	if (error)
+		goto out;
+
+	error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
+
 out:
 	mutex_unlock(&inode->i_mutex);
 	return error;

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 8070b34..6c32f1d 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c

@@ -485,14 +485,6 @@
 }
 
 STATIC int
-xfs_vn_permission(
-	struct inode		*inode,
-	int			mask)
-{
-	return generic_permission(inode, mask, xfs_check_acl);
-}
-
-STATIC int
 xfs_vn_getattr(
 	struct vfsmount		*mnt,
 	struct dentry		*dentry,
@@ -696,7 +688,7 @@
 }
 
 static const struct inode_operations xfs_inode_operations = {
-	.permission		= xfs_vn_permission,
+	.check_acl		= xfs_check_acl,
 	.truncate		= xfs_vn_truncate,
 	.getattr		= xfs_vn_getattr,
 	.setattr		= xfs_vn_setattr,
@@ -724,7 +716,7 @@
 	.rmdir			= xfs_vn_unlink,
 	.mknod			= xfs_vn_mknod,
 	.rename			= xfs_vn_rename,
-	.permission		= xfs_vn_permission,
+	.check_acl		= xfs_check_acl,
 	.getattr		= xfs_vn_getattr,
 	.setattr		= xfs_vn_setattr,
 	.setxattr		= generic_setxattr,
@@ -749,7 +741,7 @@
 	.rmdir			= xfs_vn_unlink,
 	.mknod			= xfs_vn_mknod,
 	.rename			= xfs_vn_rename,
-	.permission		= xfs_vn_permission,
+	.check_acl		= xfs_check_acl,
 	.getattr		= xfs_vn_getattr,
 	.setattr		= xfs_vn_setattr,
 	.setxattr		= generic_setxattr,
@@ -762,7 +754,7 @@
 	.readlink		= generic_readlink,
 	.follow_link		= xfs_vn_follow_link,
 	.put_link		= xfs_vn_put_link,
-	.permission		= xfs_vn_permission,
+	.check_acl		= xfs_check_acl,
 	.getattr		= xfs_vn_getattr,
 	.setattr		= xfs_vn_setattr,
 	.setxattr		= generic_setxattr,

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 1d52425..f169bcb 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h

@@ -13,6 +13,8 @@
 #include <linux/proportions.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/writeback.h>
 #include <asm/atomic.h>
 
 struct page;
@@ -23,9 +25,11 @@
  * Bits in backing_dev_info.state
  */
 enum bdi_state {
-	BDI_pdflush,		/* A pdflush thread is working this device */
+	BDI_pending,		/* On its way to being activated */
+	BDI_wb_alloc,		/* Default embedded wb allocated */
 	BDI_async_congested,	/* The async (write) queue is getting full */
 	BDI_sync_congested,	/* The sync queue is getting full */
+	BDI_registered,		/* bdi_register() was done */
 	BDI_unused,		/* Available bits start here */
 };
 
@@ -39,7 +43,22 @@
 
 #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
+struct bdi_writeback {
+	struct list_head list;			/* hangs off the bdi */
+
+	struct backing_dev_info *bdi;		/* our parent bdi */
+	unsigned int nr;
+
+	unsigned long last_old_flush;		/* last old data flush */
+
+	struct task_struct	*task;		/* writeback task */
+	struct list_head	b_dirty;	/* dirty inodes */
+	struct list_head	b_io;		/* parked for writeback */
+	struct list_head	b_more_io;	/* parked for more writeback */
+};
+
 struct backing_dev_info {
+	struct list_head bdi_list;
 	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
 	unsigned long state;	/* Always use atomic bitops on this */
 	unsigned int capabilities; /* Device capabilities */
@@ -48,6 +67,8 @@
 	void (*unplug_io_fn)(struct backing_dev_info *, struct page *);
 	void *unplug_io_data;
 
+	char *name;
+
 	struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS];
 
 	struct prop_local_percpu completions;
@@ -56,6 +77,14 @@
 	unsigned int min_ratio;
 	unsigned int max_ratio, max_prop_frac;
 
+	struct bdi_writeback wb;  /* default writeback info for this bdi */
+	spinlock_t wb_lock;	  /* protects update side of wb_list */
+	struct list_head wb_list; /* the flusher threads hanging off this bdi */
+	unsigned long wb_mask;	  /* bitmask of registered tasks */
+	unsigned int wb_cnt;	  /* number of registered tasks */
+
+	struct list_head work_list;
+
 	struct device *dev;
 
 #ifdef CONFIG_DEBUG_FS
@@ -71,6 +100,19 @@
 		const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
+void bdi_start_writeback(struct writeback_control *wbc);
+int bdi_writeback_task(struct bdi_writeback *wb);
+int bdi_has_dirty_io(struct backing_dev_info *bdi);
+
+extern spinlock_t bdi_lock;
+extern struct list_head bdi_list;
+
+static inline int wb_has_dirty_io(struct bdi_writeback *wb)
+{
+	return !list_empty(&wb->b_dirty) ||
+	       !list_empty(&wb->b_io) ||
+	       !list_empty(&wb->b_more_io);
+}
 
 static inline void __add_bdi_stat(struct backing_dev_info *bdi,
 		enum bdi_stat_item item, s64 amount)
@@ -261,6 +303,11 @@
 	return bdi->capabilities & BDI_CAP_SWAP_BACKED;
 }
 
+static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi)
+{
+	return bdi == &default_backing_dev_info;
+}
+
 static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
 {
 	return bdi_cap_writeback_dirty(mapping->backing_dev_info);
@@ -276,4 +323,10 @@
 	return bdi_cap_swap_backed(mapping->backing_dev_info);
 }
 
+static inline int bdi_sched_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
 #endif		/* _LINUX_BACKING_DEV_H */

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4fa9996..24520a5 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h

@@ -114,6 +114,13 @@
  */
 struct cred {
 	atomic_t	usage;
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	atomic_t	subscribers;	/* number of processes subscribed */
+	void		*put_addr;
+	unsigned	magic;
+#define CRED_MAGIC	0x43736564
+#define CRED_MAGIC_DEAD	0x44656144
+#endif
 	uid_t		uid;		/* real UID of the task */
 	gid_t		gid;		/* real GID of the task */
 	uid_t		suid;		/* saved UID of the task */
@@ -143,7 +150,9 @@
 };
 
 extern void __put_cred(struct cred *);
+extern void exit_creds(struct task_struct *);
 extern int copy_creds(struct task_struct *, unsigned long);
+extern struct cred *cred_alloc_blank(void);
 extern struct cred *prepare_creds(void);
 extern struct cred *prepare_exec_creds(void);
 extern struct cred *prepare_usermodehelper_creds(void);
@@ -158,6 +167,60 @@
 extern int set_create_files_as(struct cred *, struct inode *);
 extern void __init cred_init(void);
 
+/*
+ * check for validity of credentials
+ */
+#ifdef CONFIG_DEBUG_CREDENTIALS
+extern void __invalid_creds(const struct cred *, const char *, unsigned);
+extern void __validate_process_creds(struct task_struct *,
+				     const char *, unsigned);
+
+static inline bool creds_are_invalid(const struct cred *cred)
+{
+	if (cred->magic != CRED_MAGIC)
+		return true;
+	if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
+		return true;
+#ifdef CONFIG_SECURITY_SELINUX
+	if ((unsigned long) cred->security < PAGE_SIZE)
+		return true;
+	if ((*(u32*)cred->security & 0xffffff00) ==
+	    (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
+		return true;
+#endif
+	return false;
+}
+
+static inline void __validate_creds(const struct cred *cred,
+				    const char *file, unsigned line)
+{
+	if (unlikely(creds_are_invalid(cred)))
+		__invalid_creds(cred, file, line);
+}
+
+#define validate_creds(cred)				\
+do {							\
+	__validate_creds((cred), __FILE__, __LINE__);	\
+} while(0)
+
+#define validate_process_creds()				\
+do {								\
+	__validate_process_creds(current, __FILE__, __LINE__);	\
+} while(0)
+
+extern void validate_creds_for_do_exit(struct task_struct *);
+#else
+static inline void validate_creds(const struct cred *cred)
+{
+}
+static inline void validate_creds_for_do_exit(struct task_struct *tsk)
+{
+}
+static inline void validate_process_creds(void)
+{
+}
+#endif
+
 /**
  * get_new_cred - Get a reference on a new set of credentials
  * @cred: The new credentials to reference
@@ -186,7 +249,9 @@
  */
 static inline const struct cred *get_cred(const struct cred *cred)
 {
-	return get_new_cred((struct cred *) cred);
+	struct cred *nonconst_cred = (struct cred *) cred;
+	validate_creds(cred);
+	return get_new_cred(nonconst_cred);
 }
 
 /**
@@ -204,7 +269,7 @@
 {
 	struct cred *cred = (struct cred *) _cred;
 
-	BUG_ON(atomic_read(&(cred)->usage) <= 0);
+	validate_creds(cred);
 	if (atomic_dec_and_test(&(cred)->usage))
 		__put_cred(cred);
 }

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3972ffb..a79f483 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h

@@ -715,7 +715,7 @@
 
 struct inode {
 	struct hlist_node	i_hash;
-	struct list_head	i_list;
+	struct list_head	i_list;		/* backing dev IO list */
 	struct list_head	i_sb_list;
 	struct list_head	i_dentry;
 	unsigned long		i_ino;
@@ -1336,9 +1336,6 @@
 	struct xattr_handler	**s_xattr;
 
 	struct list_head	s_inodes;	/* all inodes */
-	struct list_head	s_dirty;	/* dirty inodes */
-	struct list_head	s_io;		/* parked for writeback */
-	struct list_head	s_more_io;	/* parked for more writeback */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
 	struct list_head	s_files;
 	/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
@@ -1528,6 +1525,7 @@
 	void (*put_link) (struct dentry *, struct nameidata *, void *);
 	void (*truncate) (struct inode *);
 	int (*permission) (struct inode *, int);
+	int (*check_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -1788,6 +1786,7 @@
 	struct vfsmount *mnt);
 extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
 int __put_super_and_need_restart(struct super_block *sb);
+void put_super(struct super_block *sb);
 
 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
 #define fops_get(fops) \
@@ -2083,8 +2082,6 @@
 extern int invalidate_inode_pages2(struct address_space *mapping);
 extern int invalidate_inode_pages2_range(struct address_space *mapping,
 					 pgoff_t start, pgoff_t end);
-extern void generic_sync_sb_inodes(struct super_block *sb,
-				struct writeback_control *wbc);
 extern int write_inode_now(struct inode *, int);
 extern int filemap_fdatawrite(struct address_space *);
 extern int filemap_flush(struct address_space *);
@@ -2199,7 +2196,6 @@
 extern int set_blocksize(struct block_device *, int);
 extern int sb_set_blocksize(struct super_block *, int);
 extern int sb_min_blocksize(struct super_block *, int);
-extern int sb_has_dirty_inodes(struct super_block *);
 
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);

diff --git a/include/linux/key.h b/include/linux/key.h
index e544f46..cd50dfa 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h

@@ -129,7 +129,10 @@
 	struct rw_semaphore	sem;		/* change vs change sem */
 	struct key_user		*user;		/* owner of this key */
 	void			*security;	/* security data for this key */
-	time_t			expiry;		/* time at which key expires (or 0) */
+	union {
+		time_t		expiry;		/* time at which key expires (or 0) */
+		time_t		revoked_at;	/* time at which key was revoked */
+	};
 	uid_t			uid;
 	gid_t			gid;
 	key_perm_t		perm;		/* access permissions */
@@ -275,6 +278,8 @@
 extern ctl_table key_sysctls[];
 #endif
 
+extern void key_replace_session_keyring(void);
+
 /*
  * the userspace interface
  */
@@ -297,6 +302,7 @@
 #define key_fsuid_changed(t)		do { } while(0)
 #define key_fsgid_changed(t)		do { } while(0)
 #define key_init()			do { } while(0)
+#define key_replace_session_keyring()	do { } while(0)
 
 #endif /* CONFIG_KEYS */
 #endif /* __KERNEL__ */

diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h
index c0688eb..bd383f1 100644
--- a/include/linux/keyctl.h
+++ b/include/linux/keyctl.h

@@ -52,5 +52,6 @@
 #define KEYCTL_SET_TIMEOUT		15	/* set key timeout */
 #define KEYCTL_ASSUME_AUTHORITY		16	/* assume request_key() authorisation */
 #define KEYCTL_GET_SECURITY		17	/* get key security label */
+#define KEYCTL_SESSION_TO_PARENT	18	/* apply session keyring to parent process */
 
 #endif /*  _LINUX_KEYCTL_H */

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 47b39b7..dc2fd54 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h

@@ -34,6 +34,8 @@
 int kmemcheck_show_addr(unsigned long address);
 int kmemcheck_hide_addr(unsigned long address);
 
+bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size);
+
 #else
 #define kmemcheck_enabled 0
 
@@ -99,6 +101,11 @@
 {
 }
 
+static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
+{
+	return true;
+}
+
 #endif /* CONFIG_KMEMCHECK */
 
 /*

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 6a63807..3c7497d 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h

@@ -23,18 +23,18 @@
 
 #ifdef CONFIG_DEBUG_KMEMLEAK
 
-extern void kmemleak_init(void);
+extern void kmemleak_init(void) __ref;
 extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
-			   gfp_t gfp);
-extern void kmemleak_free(const void *ptr);
-extern void kmemleak_free_part(const void *ptr, size_t size);
+			   gfp_t gfp) __ref;
+extern void kmemleak_free(const void *ptr) __ref;
+extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
 extern void kmemleak_padding(const void *ptr, unsigned long offset,
-			     size_t size);
-extern void kmemleak_not_leak(const void *ptr);
-extern void kmemleak_ignore(const void *ptr);
+			     size_t size) __ref;
+extern void kmemleak_not_leak(const void *ptr) __ref;
+extern void kmemleak_ignore(const void *ptr) __ref;
 extern void kmemleak_scan_area(const void *ptr, unsigned long offset,
-			       size_t length, gfp_t gfp);
-extern void kmemleak_no_scan(const void *ptr);
+			       size_t length, gfp_t gfp) __ref;
+extern void kmemleak_no_scan(const void *ptr) __ref;
 
 static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
 					    int min_count, unsigned long flags,

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index e461b2c..190c378 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h

@@ -33,6 +33,7 @@
 #define LSM_AUDIT_DATA_IPC     4
 #define LSM_AUDIT_DATA_TASK    5
 #define LSM_AUDIT_DATA_KEY     6
+#define LSM_AUDIT_NO_AUDIT     7
 	struct task_struct *tsk;
 	union 	{
 		struct {
@@ -66,16 +67,19 @@
 		} key_struct;
 #endif
 	} u;
-	const char *function;
 	/* this union contains LSM specific data */
 	union {
+#ifdef CONFIG_SECURITY_SMACK
 		/* SMACK data */
 		struct smack_audit_data {
+			const char *function;
 			char *subject;
 			char *object;
 			char *request;
 			int result;
 		} smack_audit_data;
+#endif
+#ifdef CONFIG_SECURITY_SELINUX
 		/* SELinux data */
 		struct {
 			u32 ssid;
@@ -83,10 +87,12 @@
 			u16 tclass;
 			u32 requested;
 			u32 audited;
+			u32 denied;
 			struct av_decision *avd;
 			int result;
 		} selinux_audit_data;
-	} lsm_priv;
+#endif
+	};
 	/* these callback will be implemented by a specific LSM */
 	void (*lsm_pre_audit)(struct audit_buffer *, void *);
 	void (*lsm_post_audit)(struct audit_buffer *, void *);
@@ -104,7 +110,7 @@
 /* Initialize an LSM audit data structure. */
 #define COMMON_AUDIT_DATA_INIT(_d, _t) \
 	{ memset((_d), 0, sizeof(struct common_audit_data)); \
-	 (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; }
+	 (_d)->type = LSM_AUDIT_DATA_##_t; }
 
 void common_lsm_audit(struct common_audit_data *a);
 

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0f1ea4a..9304027 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -1292,6 +1292,7 @@
 	struct mutex cred_guard_mutex;	/* guard against foreign influences on
 					 * credential calculations
 					 * (notably. ptrace) */
+	struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */
 
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
@@ -2077,7 +2078,7 @@
 #define for_each_process(p) \
 	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
 
-extern bool is_single_threaded(struct task_struct *);
+extern bool current_is_single_threaded(void);
 
 /*
  * Careful: do_each_thread/while_each_thread is a double loop so

diff --git a/include/linux/security.h b/include/linux/security.h
index 1f16eea..d050b66 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h

@@ -53,7 +53,7 @@
 extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
 		       int cap, int audit);
 extern int cap_settime(struct timespec *ts, struct timezone *tz);
-extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode);
+extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
 extern int cap_ptrace_traceme(struct task_struct *parent);
 extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
 extern int cap_capset(struct cred *new, const struct cred *old,
@@ -653,6 +653,11 @@
  *	manual page for definitions of the @clone_flags.
  *	@clone_flags contains the flags indicating what should be shared.
  *	Return 0 if permission is granted.
+ * @cred_alloc_blank:
+ *	@cred points to the credentials.
+ *	@gfp indicates the atomicity of any memory allocations.
+ *	Only allocate sufficient memory and attach to @cred such that
+ *	cred_transfer() will not get ENOMEM.
  * @cred_free:
  *	@cred points to the credentials.
  *	Deallocate and clear the cred->security field in a set of credentials.
@@ -665,6 +670,10 @@
  *	@new points to the new credentials.
  *	@old points to the original credentials.
  *	Install a new set of credentials.
+ * @cred_transfer:
+ *	@new points to the new credentials.
+ *	@old points to the original credentials.
+ *	Transfer data from original creds to new creds
  * @kernel_act_as:
  *	Set the credentials for a kernel service to act as (subjective context).
  *	@new points to the credentials to be modified.
@@ -678,6 +687,10 @@
  *	@inode points to the inode to use as a reference.
  *	The current task must be the one that nominated @inode.
  *	Return 0 if successful.
+ * @kernel_module_request:
+ *	Ability to trigger the kernel to automatically upcall to userspace for
+ *	userspace to load a kernel module with the given name.
+ *	Return 0 if successful.
  * @task_setuid:
  *	Check permission before setting one or more of the user identity
  *	attributes of the current process.  The @flags parameter indicates
@@ -994,6 +1007,17 @@
  *	Sets the connection's peersid to the secmark on skb.
  * @req_classify_flow:
  *	Sets the flow's sid to the openreq sid.
+ * @tun_dev_create:
+ *	Check permissions prior to creating a new TUN device.
+ * @tun_dev_post_create:
+ *	This hook allows a module to update or allocate a per-socket security
+ *	structure.
+ *	@sk contains the newly created sock structure.
+ * @tun_dev_attach:
+ *	Check permissions prior to attaching to a persistent TUN device.  This
+ *	hook can also be used by the module to update any security state
+ *	associated with the TUN device's sock structure.
+ *	@sk contains the existing sock structure.
  *
  * Security hooks for XFRM operations.
  *
@@ -1088,6 +1112,13 @@
  *	Return the length of the string (including terminating NUL) or -ve if
  *      an error.
  *	May also return 0 (and a NULL buffer pointer) if there is no label.
+ * @key_session_to_parent:
+ *	Forcibly assign the session keyring from a process to its parent
+ *	process.
+ *	@cred: Pointer to process's credentials
+ *	@parent_cred: Pointer to parent process's credentials
+ *	@keyring: Proposed new session keyring
+ *	Return 0 if permission is granted, -ve error otherwise.
  *
  * Security hooks affecting all System V IPC operations.
  *
@@ -1229,7 +1260,7 @@
  *	@alter contains the flag indicating whether changes are to be made.
  *	Return 0 if permission is granted.
  *
- * @ptrace_may_access:
+ * @ptrace_access_check:
  *	Check permission before allowing the current process to trace the
  *	@child process.
  *	Security modules may also want to perform a process tracing check
@@ -1244,7 +1275,7 @@
  *	Check that the @parent process has sufficient permission to trace the
  *	current process before allowing the current process to present itself
  *	to the @parent process for tracing.
- *	The parent process will still have to undergo the ptrace_may_access
+ *	The parent process will still have to undergo the ptrace_access_check
  *	checks before it is allowed to trace this one.
  *	@parent contains the task_struct structure for debugger process.
  *	Return 0 if permission is granted.
@@ -1351,12 +1382,47 @@
  *	audit_rule_init.
  *	@rule contains the allocated rule
  *
+ * @inode_notifysecctx:
+ *	Notify the security module of what the security context of an inode
+ *	should be.  Initializes the incore security context managed by the
+ *	security module for this inode.  Example usage:  NFS client invokes
+ *	this hook to initialize the security context in its incore inode to the
+ *	value provided by the server for the file when the server returned the
+ *	file's attributes to the client.
+ *
+ * 	Must be called with inode->i_mutex locked.
+ *
+ * 	@inode we wish to set the security context of.
+ * 	@ctx contains the string which we wish to set in the inode.
+ * 	@ctxlen contains the length of @ctx.
+ *
+ * @inode_setsecctx:
+ * 	Change the security context of an inode.  Updates the
+ * 	incore security context managed by the security module and invokes the
+ * 	fs code as needed (via __vfs_setxattr_noperm) to update any backing
+ * 	xattrs that represent the context.  Example usage:  NFS server invokes
+ * 	this hook to change the security context in its incore inode and on the
+ * 	backing filesystem to a value provided by the client on a SETATTR
+ * 	operation.
+ *
+ * 	Must be called with inode->i_mutex locked.
+ *
+ * 	@dentry contains the inode we wish to set the security context of.
+ * 	@ctx contains the string which we wish to set in the inode.
+ * 	@ctxlen contains the length of @ctx.
+ *
+ * @inode_getsecctx:
+ * 	Returns a string containing all relavent security context information
+ *
+ * 	@inode we wish to set the security context of.
+ *	@ctx is a pointer in which to place the allocated security context.
+ *	@ctxlen points to the place to put the length of @ctx.
  * This is the main security structure.
  */
 struct security_operations {
 	char name[SECURITY_NAME_MAX + 1];
 
-	int (*ptrace_may_access) (struct task_struct *child, unsigned int mode);
+	int (*ptrace_access_check) (struct task_struct *child, unsigned int mode);
 	int (*ptrace_traceme) (struct task_struct *parent);
 	int (*capget) (struct task_struct *target,
 		       kernel_cap_t *effective,
@@ -1483,12 +1549,15 @@
 	int (*dentry_open) (struct file *file, const struct cred *cred);
 
 	int (*task_create) (unsigned long clone_flags);
+	int (*cred_alloc_blank) (struct cred *cred, gfp_t gfp);
 	void (*cred_free) (struct cred *cred);
 	int (*cred_prepare)(struct cred *new, const struct cred *old,
 			    gfp_t gfp);
 	void (*cred_commit)(struct cred *new, const struct cred *old);
+	void (*cred_transfer)(struct cred *new, const struct cred *old);
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
+	int (*kernel_module_request)(void);
 	int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags);
 	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
 				int flags);
@@ -1556,6 +1625,10 @@
 	int (*secctx_to_secid) (const char *secdata, u32 seclen, u32 *secid);
 	void (*release_secctx) (char *secdata, u32 seclen);
 
+	int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen);
+	int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen);
+	int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen);
+
 #ifdef CONFIG_SECURITY_NETWORK
 	int (*unix_stream_connect) (struct socket *sock,
 				    struct socket *other, struct sock *newsk);
@@ -1592,6 +1665,9 @@
 	void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req);
 	void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb);
 	void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
+	int (*tun_dev_create)(void);
+	void (*tun_dev_post_create)(struct sock *sk);
+	int (*tun_dev_attach)(struct sock *sk);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -1620,6 +1696,9 @@
 			       const struct cred *cred,
 			       key_perm_t perm);
 	int (*key_getsecurity)(struct key *key, char **_buffer);
+	int (*key_session_to_parent)(const struct cred *cred,
+				     const struct cred *parent_cred,
+				     struct key *key);
 #endif	/* CONFIG_KEYS */
 
 #ifdef CONFIG_AUDIT
@@ -1637,7 +1716,7 @@
 extern int register_security(struct security_operations *ops);
 
 /* Security operations */
-int security_ptrace_may_access(struct task_struct *child, unsigned int mode);
+int security_ptrace_access_check(struct task_struct *child, unsigned int mode);
 int security_ptrace_traceme(struct task_struct *parent);
 int security_capget(struct task_struct *target,
 		    kernel_cap_t *effective,
@@ -1736,11 +1815,14 @@
 int security_file_receive(struct file *file);
 int security_dentry_open(struct file *file, const struct cred *cred);
 int security_task_create(unsigned long clone_flags);
+int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
 void security_cred_free(struct cred *cred);
 int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
 void security_commit_creds(struct cred *new, const struct cred *old);
+void security_transfer_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
+int security_kernel_module_request(void);
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags);
@@ -1796,6 +1878,9 @@
 int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
 void security_release_secctx(char *secdata, u32 seclen);
 
+int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
+int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
+int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
 #else /* CONFIG_SECURITY */
 struct security_mnt_opts {
 };
@@ -1818,10 +1903,10 @@
 	return 0;
 }
 
-static inline int security_ptrace_may_access(struct task_struct *child,
+static inline int security_ptrace_access_check(struct task_struct *child,
 					     unsigned int mode)
 {
-	return cap_ptrace_may_access(child, mode);
+	return cap_ptrace_access_check(child, mode);
 }
 
 static inline int security_ptrace_traceme(struct task_struct *parent)
@@ -2266,6 +2351,11 @@
 	return 0;
 }
 
+static inline int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+	return 0;
+}
+
 static inline void security_cred_free(struct cred *cred)
 { }
 
@@ -2281,6 +2371,11 @@
 {
 }
 
+static inline void security_transfer_creds(struct cred *new,
+					   const struct cred *old)
+{
+}
+
 static inline int security_kernel_act_as(struct cred *cred, u32 secid)
 {
 	return 0;
@@ -2292,6 +2387,11 @@
 	return 0;
 }
 
+static inline int security_kernel_module_request(void)
+{
+	return 0;
+}
+
 static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2,
 				       int flags)
 {
@@ -2537,6 +2637,19 @@
 static inline void security_release_secctx(char *secdata, u32 seclen)
 {
 }
+
+static inline int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+	return -EOPNOTSUPP;
+}
+static inline int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+	return -EOPNOTSUPP;
+}
+static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+	return -EOPNOTSUPP;
+}
 #endif	/* CONFIG_SECURITY */
 
 #ifdef CONFIG_SECURITY_NETWORK
@@ -2575,6 +2688,9 @@
 			const struct request_sock *req);
 void security_inet_conn_established(struct sock *sk,
 			struct sk_buff *skb);
+int security_tun_dev_create(void);
+void security_tun_dev_post_create(struct sock *sk);
+int security_tun_dev_attach(struct sock *sk);
 
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct socket *sock,
@@ -2725,6 +2841,20 @@
 			struct sk_buff *skb)
 {
 }
+
+static inline int security_tun_dev_create(void)
+{
+	return 0;
+}
+
+static inline void security_tun_dev_post_create(struct sock *sk)
+{
+}
+
+static inline int security_tun_dev_attach(struct sock *sk)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -2881,6 +3011,9 @@
 int security_key_permission(key_ref_t key_ref,
 			    const struct cred *cred, key_perm_t perm);
 int security_key_getsecurity(struct key *key, char **_buffer);
+int security_key_session_to_parent(const struct cred *cred,
+				   const struct cred *parent_cred,
+				   struct key *key);
 
 #else
 
@@ -2908,6 +3041,13 @@
 	return 0;
 }
 
+static inline int security_key_session_to_parent(const struct cred *cred,
+						 const struct cred *parent_cred,
+						 struct key *key)
+{
+	return 0;
+}
+
 #endif
 #endif /* CONFIG_KEYS */
 

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index abff6c9..6d3f2f4 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h

@@ -39,7 +39,7 @@
 }
 
 #ifdef CONFIG_TMPFS_POSIX_ACL
-int shmem_permission(struct inode *, int);
+int shmem_check_acl(struct inode *, int);
 int shmem_acl_init(struct inode *, struct inode *);
 
 extern struct xattr_handler shmem_xattr_acl_access_handler;

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 3224820..78b1e46 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h

@@ -14,17 +14,6 @@
 extern struct list_head inode_unused;
 
 /*
- * Yes, writeback.h requires sched.h
- * No, sched.h is not included from here.
- */
-static inline int task_is_pdflush(struct task_struct *task)
-{
-	return task->flags & PF_FLUSHER;
-}
-
-#define current_is_pdflush()	task_is_pdflush(current)
-
-/*
  * fs/fs-writeback.c
  */
 enum writeback_sync_modes {
@@ -40,6 +29,8 @@
 struct writeback_control {
 	struct backing_dev_info *bdi;	/* If !NULL, only write back this
 					   queue */
+	struct super_block *sb;		/* if !NULL, only write inodes from
+					   this super_block */
 	enum writeback_sync_modes sync_mode;
 	unsigned long *older_than_this;	/* If !NULL, only write back inodes
 					   older than this */
@@ -76,9 +67,13 @@
 /*
  * fs/fs-writeback.c
  */	
-void writeback_inodes(struct writeback_control *wbc);
+struct bdi_writeback;
 int inode_wait(void *);
-void sync_inodes_sb(struct super_block *, int wait);
+long writeback_inodes_sb(struct super_block *);
+long sync_inodes_sb(struct super_block *);
+void writeback_inodes_wbc(struct writeback_control *wbc);
+long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
+void wakeup_flusher_threads(long nr_pages);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
@@ -98,7 +93,6 @@
 /*
  * mm/page-writeback.c
  */
-int wakeup_pdflush(long nr_pages);
 void laptop_io_completion(void);
 void laptop_sync_completion(void);
 void throttle_vm_writeout(gfp_t gfp_mask);
@@ -150,7 +144,6 @@
 typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
 				void *data);
 
-int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
 int generic_writepages(struct address_space *mapping,
 		       struct writeback_control *wbc);
 int write_cache_pages(struct address_space *mapping,

diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index d131e35..5c84af8 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h

@@ -49,6 +49,7 @@
 ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t);
 ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t);
 ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
+int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int);
 int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int);
 int vfs_removexattr(struct dentry *, const char *);
 

diff --git a/kernel/acct.c b/kernel/acct.c
index 9f33910..9a4715a 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c

@@ -491,13 +491,17 @@
 	u64 run_time;
 	struct timespec uptime;
 	struct tty_struct *tty;
+	const struct cred *orig_cred;
+
+	/* Perform file operations on behalf of whoever enabled accounting */
+	orig_cred = override_creds(file->f_cred);
 
 	/*
 	 * First check to see if there is enough free_space to continue
 	 * the process accounting system.
 	 */
 	if (!check_free_space(acct, file))
-		return;
+		goto out;
 
 	/*
 	 * Fill the accounting struct with the needed info as recorded
@@ -578,6 +582,8 @@
 			       sizeof(acct_t), &file->f_pos);
 	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
 	set_fs(fs);
+out:
+	revert_creds(orig_cred);
 }
 
 /**

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b6eadfe3..c7ece8f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c

@@ -600,6 +600,7 @@
 static struct file_operations proc_cgroupstats_operations;
 
 static struct backing_dev_info cgroup_backing_dev_info = {
+	.name		= "cgroup",
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
 

diff --git a/kernel/cred.c b/kernel/cred.c
index 1bb4d7e..006fcab 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c

@@ -18,6 +18,18 @@
 #include <linux/cn_proc.h>
 #include "cred-internals.h"
 
+#if 0
+#define kdebug(FMT, ...) \
+	printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
+#else
+static inline __attribute__((format(printf, 1, 2)))
+void no_printk(const char *fmt, ...)
+{
+}
+#define kdebug(FMT, ...) \
+	no_printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
+#endif
+
 static struct kmem_cache *cred_jar;
 
 /*
@@ -36,6 +48,10 @@
  */
 struct cred init_cred = {
 	.usage			= ATOMIC_INIT(4),
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	.subscribers		= ATOMIC_INIT(2),
+	.magic			= CRED_MAGIC,
+#endif
 	.securebits		= SECUREBITS_DEFAULT,
 	.cap_inheritable	= CAP_INIT_INH_SET,
 	.cap_permitted		= CAP_FULL_SET,
@@ -48,6 +64,31 @@
 #endif
 };
 
+static inline void set_cred_subscribers(struct cred *cred, int n)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	atomic_set(&cred->subscribers, n);
+#endif
+}
+
+static inline int read_cred_subscribers(const struct cred *cred)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	return atomic_read(&cred->subscribers);
+#else
+	return 0;
+#endif
+}
+
+static inline void alter_cred_subscribers(const struct cred *_cred, int n)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	struct cred *cred = (struct cred *) _cred;
+
+	atomic_add(n, &cred->subscribers);
+#endif
+}
+
 /*
  * Dispose of the shared task group credentials
  */
@@ -85,9 +126,22 @@
 {
 	struct cred *cred = container_of(rcu, struct cred, rcu);
 
+	kdebug("put_cred_rcu(%p)", cred);
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	if (cred->magic != CRED_MAGIC_DEAD ||
+	    atomic_read(&cred->usage) != 0 ||
+	    read_cred_subscribers(cred) != 0)
+		panic("CRED: put_cred_rcu() sees %p with"
+		      " mag %x, put %p, usage %d, subscr %d\n",
+		      cred, cred->magic, cred->put_addr,
+		      atomic_read(&cred->usage),
+		      read_cred_subscribers(cred));
+#else
 	if (atomic_read(&cred->usage) != 0)
 		panic("CRED: put_cred_rcu() sees %p with usage %d\n",
 		      cred, atomic_read(&cred->usage));
+#endif
 
 	security_cred_free(cred);
 	key_put(cred->thread_keyring);
@@ -106,12 +160,90 @@
  */
 void __put_cred(struct cred *cred)
 {
+	kdebug("__put_cred(%p{%d,%d})", cred,
+	       atomic_read(&cred->usage),
+	       read_cred_subscribers(cred));
+
 	BUG_ON(atomic_read(&cred->usage) != 0);
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	BUG_ON(read_cred_subscribers(cred) != 0);
+	cred->magic = CRED_MAGIC_DEAD;
+	cred->put_addr = __builtin_return_address(0);
+#endif
+	BUG_ON(cred == current->cred);
+	BUG_ON(cred == current->real_cred);
 
 	call_rcu(&cred->rcu, put_cred_rcu);
 }
 EXPORT_SYMBOL(__put_cred);
 
+/*
+ * Clean up a task's credentials when it exits
+ */
+void exit_creds(struct task_struct *tsk)
+{
+	struct cred *cred;
+
+	kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred,
+	       atomic_read(&tsk->cred->usage),
+	       read_cred_subscribers(tsk->cred));
+
+	cred = (struct cred *) tsk->real_cred;
+	tsk->real_cred = NULL;
+	validate_creds(cred);
+	alter_cred_subscribers(cred, -1);
+	put_cred(cred);
+
+	cred = (struct cred *) tsk->cred;
+	tsk->cred = NULL;
+	validate_creds(cred);
+	alter_cred_subscribers(cred, -1);
+	put_cred(cred);
+
+	cred = (struct cred *) tsk->replacement_session_keyring;
+	if (cred) {
+		tsk->replacement_session_keyring = NULL;
+		validate_creds(cred);
+		put_cred(cred);
+	}
+}
+
+/*
+ * Allocate blank credentials, such that the credentials can be filled in at a
+ * later date without risk of ENOMEM.
+ */
+struct cred *cred_alloc_blank(void)
+{
+	struct cred *new;
+
+	new = kmem_cache_zalloc(cred_jar, GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+#ifdef CONFIG_KEYS
+	new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL);
+	if (!new->tgcred) {
+		kfree(new);
+		return NULL;
+	}
+	atomic_set(&new->tgcred->usage, 1);
+#endif
+
+	atomic_set(&new->usage, 1);
+
+	if (security_cred_alloc_blank(new, GFP_KERNEL) < 0)
+		goto error;
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	new->magic = CRED_MAGIC;
+#endif
+	return new;
+
+error:
+	abort_creds(new);
+	return NULL;
+}
+
 /**
  * prepare_creds - Prepare a new set of credentials for modification
  *
@@ -132,16 +264,19 @@
 	const struct cred *old;
 	struct cred *new;
 
-	BUG_ON(atomic_read(&task->real_cred->usage) < 1);
+	validate_process_creds();
 
 	new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
 	if (!new)
 		return NULL;
 
+	kdebug("prepare_creds() alloc %p", new);
+
 	old = task->cred;
 	memcpy(new, old, sizeof(struct cred));
 
 	atomic_set(&new->usage, 1);
+	set_cred_subscribers(new, 0);
 	get_group_info(new->group_info);
 	get_uid(new->user);
 
@@ -157,6 +292,7 @@
 
 	if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
 		goto error;
+	validate_creds(new);
 	return new;
 
 error:
@@ -229,9 +365,12 @@
 	if (!new)
 		return NULL;
 
+	kdebug("prepare_usermodehelper_creds() alloc %p", new);
+
 	memcpy(new, &init_cred, sizeof(struct cred));
 
 	atomic_set(&new->usage, 1);
+	set_cred_subscribers(new, 0);
 	get_group_info(new->group_info);
 	get_uid(new->user);
 
@@ -250,6 +389,7 @@
 #endif
 	if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0)
 		goto error;
+	validate_creds(new);
 
 	BUG_ON(atomic_read(&new->usage) != 1);
 	return new;
@@ -286,6 +426,10 @@
 	    ) {
 		p->real_cred = get_cred(p->cred);
 		get_cred(p->cred);
+		alter_cred_subscribers(p->cred, 2);
+		kdebug("share_creds(%p{%d,%d})",
+		       p->cred, atomic_read(&p->cred->usage),
+		       read_cred_subscribers(p->cred));
 		atomic_inc(&p->cred->user->processes);
 		return 0;
 	}
@@ -331,6 +475,8 @@
 
 	atomic_inc(&new->user->processes);
 	p->cred = p->real_cred = get_cred(new);
+	alter_cred_subscribers(new, 2);
+	validate_creds(new);
 	return 0;
 
 error_put:
@@ -355,13 +501,20 @@
 int commit_creds(struct cred *new)
 {
 	struct task_struct *task = current;
-	const struct cred *old;
+	const struct cred *old = task->real_cred;
 
-	BUG_ON(task->cred != task->real_cred);
-	BUG_ON(atomic_read(&task->real_cred->usage) < 2);
+	kdebug("commit_creds(%p{%d,%d})", new,
+	       atomic_read(&new->usage),
+	       read_cred_subscribers(new));
+
+	BUG_ON(task->cred != old);
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	BUG_ON(read_cred_subscribers(old) < 2);
+	validate_creds(old);
+	validate_creds(new);
+#endif
 	BUG_ON(atomic_read(&new->usage) < 1);
 
-	old = task->real_cred;
 	security_commit_creds(new, old);
 
 	get_cred(new); /* we will require a ref for the subj creds too */
@@ -390,12 +543,14 @@
 	 *   cheaply with the new uid cache, so if it matters
 	 *   we should be checking for it.  -DaveM
 	 */
+	alter_cred_subscribers(new, 2);
 	if (new->user != old->user)
 		atomic_inc(&new->user->processes);
 	rcu_assign_pointer(task->real_cred, new);
 	rcu_assign_pointer(task->cred, new);
 	if (new->user != old->user)
 		atomic_dec(&old->user->processes);
+	alter_cred_subscribers(old, -2);
 
 	sched_switch_user(task);
 
@@ -428,6 +583,13 @@
  */
 void abort_creds(struct cred *new)
 {
+	kdebug("abort_creds(%p{%d,%d})", new,
+	       atomic_read(&new->usage),
+	       read_cred_subscribers(new));
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	BUG_ON(read_cred_subscribers(new) != 0);
+#endif
 	BUG_ON(atomic_read(&new->usage) < 1);
 	put_cred(new);
 }
@@ -444,7 +606,20 @@
 {
 	const struct cred *old = current->cred;
 
-	rcu_assign_pointer(current->cred, get_cred(new));
+	kdebug("override_creds(%p{%d,%d})", new,
+	       atomic_read(&new->usage),
+	       read_cred_subscribers(new));
+
+	validate_creds(old);
+	validate_creds(new);
+	get_cred(new);
+	alter_cred_subscribers(new, 1);
+	rcu_assign_pointer(current->cred, new);
+	alter_cred_subscribers(old, -1);
+
+	kdebug("override_creds() = %p{%d,%d}", old,
+	       atomic_read(&old->usage),
+	       read_cred_subscribers(old));
 	return old;
 }
 EXPORT_SYMBOL(override_creds);
@@ -460,7 +635,15 @@
 {
 	const struct cred *override = current->cred;
 
+	kdebug("revert_creds(%p{%d,%d})", old,
+	       atomic_read(&old->usage),
+	       read_cred_subscribers(old));
+
+	validate_creds(old);
+	validate_creds(override);
+	alter_cred_subscribers(old, 1);
 	rcu_assign_pointer(current->cred, old);
+	alter_cred_subscribers(override, -1);
 	put_cred(override);
 }
 EXPORT_SYMBOL(revert_creds);
@@ -502,11 +685,15 @@
 	if (!new)
 		return NULL;
 
+	kdebug("prepare_kernel_cred() alloc %p", new);
+
 	if (daemon)
 		old = get_task_cred(daemon);
 	else
 		old = get_cred(&init_cred);
 
+	validate_creds(old);
+
 	*new = *old;
 	get_uid(new->user);
 	get_group_info(new->group_info);
@@ -526,7 +713,9 @@
 		goto error;
 
 	atomic_set(&new->usage, 1);
+	set_cred_subscribers(new, 0);
 	put_cred(old);
+	validate_creds(new);
 	return new;
 
 error:
@@ -589,3 +778,95 @@
 	return security_kernel_create_files_as(new, inode);
 }
 EXPORT_SYMBOL(set_create_files_as);
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+
+/*
+ * dump invalid credentials
+ */
+static void dump_invalid_creds(const struct cred *cred, const char *label,
+			       const struct task_struct *tsk)
+{
+	printk(KERN_ERR "CRED: %s credentials: %p %s%s%s\n",
+	       label, cred,
+	       cred == &init_cred ? "[init]" : "",
+	       cred == tsk->real_cred ? "[real]" : "",
+	       cred == tsk->cred ? "[eff]" : "");
+	printk(KERN_ERR "CRED: ->magic=%x, put_addr=%p\n",
+	       cred->magic, cred->put_addr);
+	printk(KERN_ERR "CRED: ->usage=%d, subscr=%d\n",
+	       atomic_read(&cred->usage),
+	       read_cred_subscribers(cred));
+	printk(KERN_ERR "CRED: ->*uid = { %d,%d,%d,%d }\n",
+	       cred->uid, cred->euid, cred->suid, cred->fsuid);
+	printk(KERN_ERR "CRED: ->*gid = { %d,%d,%d,%d }\n",
+	       cred->gid, cred->egid, cred->sgid, cred->fsgid);
+#ifdef CONFIG_SECURITY
+	printk(KERN_ERR "CRED: ->security is %p\n", cred->security);
+	if ((unsigned long) cred->security >= PAGE_SIZE &&
+	    (((unsigned long) cred->security & 0xffffff00) !=
+	     (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8)))
+		printk(KERN_ERR "CRED: ->security {%x, %x}\n",
+		       ((u32*)cred->security)[0],
+		       ((u32*)cred->security)[1]);
+#endif
+}
+
+/*
+ * report use of invalid credentials
+ */
+void __invalid_creds(const struct cred *cred, const char *file, unsigned line)
+{
+	printk(KERN_ERR "CRED: Invalid credentials\n");
+	printk(KERN_ERR "CRED: At %s:%u\n", file, line);
+	dump_invalid_creds(cred, "Specified", current);
+	BUG();
+}
+EXPORT_SYMBOL(__invalid_creds);
+
+/*
+ * check the credentials on a process
+ */
+void __validate_process_creds(struct task_struct *tsk,
+			      const char *file, unsigned line)
+{
+	if (tsk->cred == tsk->real_cred) {
+		if (unlikely(read_cred_subscribers(tsk->cred) < 2 ||
+			     creds_are_invalid(tsk->cred)))
+			goto invalid_creds;
+	} else {
+		if (unlikely(read_cred_subscribers(tsk->real_cred) < 1 ||
+			     read_cred_subscribers(tsk->cred) < 1 ||
+			     creds_are_invalid(tsk->real_cred) ||
+			     creds_are_invalid(tsk->cred)))
+			goto invalid_creds;
+	}
+	return;
+
+invalid_creds:
+	printk(KERN_ERR "CRED: Invalid process credentials\n");
+	printk(KERN_ERR "CRED: At %s:%u\n", file, line);
+
+	dump_invalid_creds(tsk->real_cred, "Real", tsk);
+	if (tsk->cred != tsk->real_cred)
+		dump_invalid_creds(tsk->cred, "Effective", tsk);
+	else
+		printk(KERN_ERR "CRED: Effective creds == Real creds\n");
+	BUG();
+}
+EXPORT_SYMBOL(__validate_process_creds);
+
+/*
+ * check creds for do_exit()
+ */
+void validate_creds_for_do_exit(struct task_struct *tsk)
+{
+	kdebug("validate_creds_for_do_exit(%p,%p{%d,%d})",
+	       tsk->real_cred, tsk->cred,
+	       atomic_read(&tsk->cred->usage),
+	       read_cred_subscribers(tsk->cred));
+
+	__validate_process_creds(tsk, __FILE__, __LINE__);
+}
+
+#endif /* CONFIG_DEBUG_CREDENTIALS */

diff --git a/kernel/exit.c b/kernel/exit.c
index 869dc22..c98ff7a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c

@@ -901,6 +901,8 @@
 
 	tracehook_report_exit(&code);
 
+	validate_creds_for_do_exit(tsk);
+
 	/*
 	 * We're taking recursive faults here in do_exit. Safest is to just
 	 * leave this task alone and wait for reboot.
@@ -1009,6 +1011,8 @@
 	if (tsk->splice_pipe)
 		__free_pipe_info(tsk->splice_pipe);
 
+	validate_creds_for_do_exit(tsk);
+
 	preempt_disable();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;

diff --git a/kernel/fork.c b/kernel/fork.c
index e6c04d4..aab8579 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c

@@ -152,8 +152,7 @@
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
-	put_cred(tsk->real_cred);
-	put_cred(tsk->cred);
+	exit_creds(tsk);
 	delayacct_tsk_free(tsk);
 
 	if (!profile_handoff_task(tsk))
@@ -1297,8 +1296,7 @@
 	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
 	atomic_dec(&p->cred->user->processes);
-	put_cred(p->real_cred);
-	put_cred(p->cred);
+	exit_creds(p);
 bad_fork_free:
 	free_task(p);
 fork_out:

diff --git a/kernel/kmod.c b/kernel/kmod.c
index 385c31a..4e8cae2 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c

@@ -78,6 +78,10 @@
 #define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
 	static int kmod_loop_msg;
 
+	ret = security_kernel_module_request();
+	if (ret)
+		return ret;
+
 	va_start(args, fmt);
 	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
 	va_end(args);
@@ -462,6 +466,7 @@
 	int retval = 0;
 
 	BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+	validate_creds(sub_info->cred);
 
 	helper_lock();
 	if (sub_info->path[0] == '\0')

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 082c320..307c285 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c

@@ -152,7 +152,7 @@
 	if (!dumpable && !capable(CAP_SYS_PTRACE))
 		return -EPERM;
 
-	return security_ptrace_may_access(task, mode);
+	return security_ptrace_access_check(task, mode);
 }
 
 bool ptrace_may_access(struct task_struct *task, unsigned int mode)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 58be760..71d8dc7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c

@@ -49,7 +49,6 @@
 #include <linux/acpi.h>
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
-#include <linux/security.h>
 #include <linux/slow-work.h>
 #include <linux/perf_counter.h>
 

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 12327b2..fbb87cf 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug

@@ -653,6 +653,21 @@
 	  This is a relatively cheap check but if you care about maximum
 	  performance, say N.
 
+config DEBUG_CREDENTIALS
+	bool "Debug credential management"
+	depends on DEBUG_KERNEL
+	help
+	  Enable this to turn on some debug checking for credential
+	  management.  The additional code keeps track of the number of
+	  pointers from task_structs to any given cred struct, and checks to
+	  see that this number never exceeds the usage count of the cred
+	  struct.
+
+	  Furthermore, if SELinux is enabled, this also checks that the
+	  security pointer in the cred struct is never seen to be invalid.
+
+	  If unsure, say N.
+
 #
 # Select this config option from the architecture Kconfig, if it
 # it is preferred to always offer frame pointers as a config

diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index f1ed2fe..bd2bea9 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c

@@ -12,34 +12,47 @@
 
 #include <linux/sched.h>
 
-/**
- * is_single_threaded - Determine if a thread group is single-threaded or not
- * @p: A task in the thread group in question
- *
- * This returns true if the thread group to which a task belongs is single
- * threaded, false if it is not.
+/*
+ * Returns true if the task does not share ->mm with another thread/process.
  */
-bool is_single_threaded(struct task_struct *p)
+bool current_is_single_threaded(void)
 {
-	struct task_struct *g, *t;
-	struct mm_struct *mm = p->mm;
+	struct task_struct *task = current;
+	struct mm_struct *mm = task->mm;
+	struct task_struct *p, *t;
+	bool ret;
 
-	if (atomic_read(&p->signal->count) != 1)
-		goto no;
+	if (atomic_read(&task->signal->live) != 1)
+		return false;
 
-	if (atomic_read(&p->mm->mm_users) != 1) {
-		read_lock(&tasklist_lock);
-		do_each_thread(g, t) {
-			if (t->mm == mm && t != p)
-				goto no_unlock;
-		} while_each_thread(g, t);
-		read_unlock(&tasklist_lock);
+	if (atomic_read(&mm->mm_users) == 1)
+		return true;
+
+	ret = false;
+	rcu_read_lock();
+	for_each_process(p) {
+		if (unlikely(p->flags & PF_KTHREAD))
+			continue;
+		if (unlikely(p == task->group_leader))
+			continue;
+
+		t = p;
+		do {
+			if (unlikely(t->mm == mm))
+				goto found;
+			if (likely(t->mm))
+				break;
+			/*
+			 * t->mm == NULL. Make sure next_thread/next_task
+			 * will see other CLONE_VM tasks which might be
+			 * forked before exiting.
+			 */
+			smp_rmb();
+		} while_each_thread(p, t);
 	}
+	ret = true;
+found:
+	rcu_read_unlock();
 
-	return true;
-
-no_unlock:
-	read_unlock(&tasklist_lock);
-no:
-	return false;
+	return ret;
 }

diff --git a/mm/Makefile b/mm/Makefile
index 5e0bd64..147a7a7 100644
--- a/mm/Makefile
+++ b/mm/Makefile

@@ -8,7 +8,7 @@
 			   vmalloc.o
 
 obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
-			   maccess.o page_alloc.o page-writeback.o pdflush.o \
+			   maccess.o page_alloc.o page-writeback.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
 			   page_isolation.o mm_init.o $(mmu-y)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index c86edd2..d3ca0da 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c

@@ -1,8 +1,11 @@
 
 #include <linux/wait.h>
 #include <linux/backing-dev.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/writeback.h>
@@ -14,6 +17,7 @@
 EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
+	.name		= "default",
 	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
@@ -22,6 +26,18 @@
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
 static struct class *bdi_class;
+DEFINE_SPINLOCK(bdi_lock);
+LIST_HEAD(bdi_list);
+LIST_HEAD(bdi_pending_list);
+
+static struct task_struct *sync_supers_tsk;
+static struct timer_list sync_supers_timer;
+
+static int bdi_sync_supers(void *);
+static void sync_supers_timer_fn(unsigned long);
+static void arm_supers_timer(void);
+
+static void bdi_add_default_flusher_task(struct backing_dev_info *bdi);
 
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
@@ -37,9 +53,29 @@
 static int bdi_debug_stats_show(struct seq_file *m, void *v)
 {
 	struct backing_dev_info *bdi = m->private;
+	struct bdi_writeback *wb;
 	unsigned long background_thresh;
 	unsigned long dirty_thresh;
 	unsigned long bdi_thresh;
+	unsigned long nr_dirty, nr_io, nr_more_io, nr_wb;
+	struct inode *inode;
+
+	/*
+	 * inode lock is enough here, the bdi->wb_list is protected by
+	 * RCU on the reader side
+	 */
+	nr_wb = nr_dirty = nr_io = nr_more_io = 0;
+	spin_lock(&inode_lock);
+	list_for_each_entry(wb, &bdi->wb_list, list) {
+		nr_wb++;
+		list_for_each_entry(inode, &wb->b_dirty, i_list)
+			nr_dirty++;
+		list_for_each_entry(inode, &wb->b_io, i_list)
+			nr_io++;
+		list_for_each_entry(inode, &wb->b_more_io, i_list)
+			nr_more_io++;
+	}
+	spin_unlock(&inode_lock);
 
 	get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi);
 
@@ -49,12 +85,22 @@
 		   "BdiReclaimable:   %8lu kB\n"
 		   "BdiDirtyThresh:   %8lu kB\n"
 		   "DirtyThresh:      %8lu kB\n"
-		   "BackgroundThresh: %8lu kB\n",
+		   "BackgroundThresh: %8lu kB\n"
+		   "WriteBack threads:%8lu\n"
+		   "b_dirty:          %8lu\n"
+		   "b_io:             %8lu\n"
+		   "b_more_io:        %8lu\n"
+		   "bdi_list:         %8u\n"
+		   "state:            %8lx\n"
+		   "wb_mask:          %8lx\n"
+		   "wb_list:          %8u\n"
+		   "wb_cnt:           %8u\n",
 		   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
 		   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
-		   K(bdi_thresh),
-		   K(dirty_thresh),
-		   K(background_thresh));
+		   K(bdi_thresh), K(dirty_thresh),
+		   K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io,
+		   !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask,
+		   !list_empty(&bdi->wb_list), bdi->wb_cnt);
 #undef K
 
 	return 0;
@@ -185,6 +231,13 @@
 {
 	int err;
 
+	sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers");
+	BUG_ON(IS_ERR(sync_supers_tsk));
+
+	init_timer(&sync_supers_timer);
+	setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
+	arm_supers_timer();
+
 	err = bdi_init(&default_backing_dev_info);
 	if (!err)
 		bdi_register(&default_backing_dev_info, NULL, "default");
@@ -193,6 +246,248 @@
 }
 subsys_initcall(default_bdi_init);
 
+static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
+{
+	memset(wb, 0, sizeof(*wb));
+
+	wb->bdi = bdi;
+	wb->last_old_flush = jiffies;
+	INIT_LIST_HEAD(&wb->b_dirty);
+	INIT_LIST_HEAD(&wb->b_io);
+	INIT_LIST_HEAD(&wb->b_more_io);
+}
+
+static void bdi_task_init(struct backing_dev_info *bdi,
+			  struct bdi_writeback *wb)
+{
+	struct task_struct *tsk = current;
+
+	spin_lock(&bdi->wb_lock);
+	list_add_tail_rcu(&wb->list, &bdi->wb_list);
+	spin_unlock(&bdi->wb_lock);
+
+	tsk->flags |= PF_FLUSHER | PF_SWAPWRITE;
+	set_freezable();
+
+	/*
+	 * Our parent may run at a different priority, just set us to normal
+	 */
+	set_user_nice(tsk, 0);
+}
+
+static int bdi_start_fn(void *ptr)
+{
+	struct bdi_writeback *wb = ptr;
+	struct backing_dev_info *bdi = wb->bdi;
+	int ret;
+
+	/*
+	 * Add us to the active bdi_list
+	 */
+	spin_lock(&bdi_lock);
+	list_add(&bdi->bdi_list, &bdi_list);
+	spin_unlock(&bdi_lock);
+
+	bdi_task_init(bdi, wb);
+
+	/*
+	 * Clear pending bit and wakeup anybody waiting to tear us down
+	 */
+	clear_bit(BDI_pending, &bdi->state);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&bdi->state, BDI_pending);
+
+	ret = bdi_writeback_task(wb);
+
+	/*
+	 * Remove us from the list
+	 */
+	spin_lock(&bdi->wb_lock);
+	list_del_rcu(&wb->list);
+	spin_unlock(&bdi->wb_lock);
+
+	/*
+	 * Flush any work that raced with us exiting. No new work
+	 * will be added, since this bdi isn't discoverable anymore.
+	 */
+	if (!list_empty(&bdi->work_list))
+		wb_do_writeback(wb, 1);
+
+	wb->task = NULL;
+	return ret;
+}
+
+int bdi_has_dirty_io(struct backing_dev_info *bdi)
+{
+	return wb_has_dirty_io(&bdi->wb);
+}
+
+static void bdi_flush_io(struct backing_dev_info *bdi)
+{
+	struct writeback_control wbc = {
+		.bdi			= bdi,
+		.sync_mode		= WB_SYNC_NONE,
+		.older_than_this	= NULL,
+		.range_cyclic		= 1,
+		.nr_to_write		= 1024,
+	};
+
+	writeback_inodes_wbc(&wbc);
+}
+
+/*
+ * kupdated() used to do this. We cannot do it from the bdi_forker_task()
+ * or we risk deadlocking on ->s_umount. The longer term solution would be
+ * to implement sync_supers_bdi() or similar and simply do it from the
+ * bdi writeback tasks individually.
+ */
+static int bdi_sync_supers(void *unused)
+{
+	set_user_nice(current, 0);
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+
+		/*
+		 * Do this periodically, like kupdated() did before.
+		 */
+		sync_supers();
+	}
+
+	return 0;
+}
+
+static void arm_supers_timer(void)
+{
+	unsigned long next;
+
+	next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies;
+	mod_timer(&sync_supers_timer, round_jiffies_up(next));
+}
+
+static void sync_supers_timer_fn(unsigned long unused)
+{
+	wake_up_process(sync_supers_tsk);
+	arm_supers_timer();
+}
+
+static int bdi_forker_task(void *ptr)
+{
+	struct bdi_writeback *me = ptr;
+
+	bdi_task_init(me->bdi, me);
+
+	for (;;) {
+		struct backing_dev_info *bdi, *tmp;
+		struct bdi_writeback *wb;
+
+		/*
+		 * Temporary measure, we want to make sure we don't see
+		 * dirty data on the default backing_dev_info
+		 */
+		if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list))
+			wb_do_writeback(me, 0);
+
+		spin_lock(&bdi_lock);
+
+		/*
+		 * Check if any existing bdi's have dirty data without
+		 * a thread registered. If so, set that up.
+		 */
+		list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) {
+			if (bdi->wb.task)
+				continue;
+			if (list_empty(&bdi->work_list) &&
+			    !bdi_has_dirty_io(bdi))
+				continue;
+
+			bdi_add_default_flusher_task(bdi);
+		}
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (list_empty(&bdi_pending_list)) {
+			unsigned long wait;
+
+			spin_unlock(&bdi_lock);
+			wait = msecs_to_jiffies(dirty_writeback_interval * 10);
+			schedule_timeout(wait);
+			try_to_freeze();
+			continue;
+		}
+
+		__set_current_state(TASK_RUNNING);
+
+		/*
+		 * This is our real job - check for pending entries in
+		 * bdi_pending_list, and create the tasks that got added
+		 */
+		bdi = list_entry(bdi_pending_list.next, struct backing_dev_info,
+				 bdi_list);
+		list_del_init(&bdi->bdi_list);
+		spin_unlock(&bdi_lock);
+
+		wb = &bdi->wb;
+		wb->task = kthread_run(bdi_start_fn, wb, "flush-%s",
+					dev_name(bdi->dev));
+		/*
+		 * If task creation fails, then readd the bdi to
+		 * the pending list and force writeout of the bdi
+		 * from this forker thread. That will free some memory
+		 * and we can try again.
+		 */
+		if (IS_ERR(wb->task)) {
+			wb->task = NULL;
+
+			/*
+			 * Add this 'bdi' to the back, so we get
+			 * a chance to flush other bdi's to free
+			 * memory.
+			 */
+			spin_lock(&bdi_lock);
+			list_add_tail(&bdi->bdi_list, &bdi_pending_list);
+			spin_unlock(&bdi_lock);
+
+			bdi_flush_io(bdi);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Add the default flusher task that gets created for any bdi
+ * that has dirty data pending writeout
+ */
+void static bdi_add_default_flusher_task(struct backing_dev_info *bdi)
+{
+	if (!bdi_cap_writeback_dirty(bdi))
+		return;
+
+	if (WARN_ON(!test_bit(BDI_registered, &bdi->state))) {
+		printk(KERN_ERR "bdi %p/%s is not registered!\n",
+							bdi, bdi->name);
+		return;
+	}
+
+	/*
+	 * Check with the helper whether to proceed adding a task. Will only
+	 * abort if we two or more simultanous calls to
+	 * bdi_add_default_flusher_task() occured, further additions will block
+	 * waiting for previous additions to finish.
+	 */
+	if (!test_and_set_bit(BDI_pending, &bdi->state)) {
+		list_move_tail(&bdi->bdi_list, &bdi_pending_list);
+
+		/*
+		 * We are now on the pending list, wake up bdi_forker_task()
+		 * to finish the job and add us back to the active bdi_list
+		 */
+		wake_up_process(default_backing_dev_info.wb.task);
+	}
+}
+
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...)
 {
@@ -211,9 +506,35 @@
 		goto exit;
 	}
 
-	bdi->dev = dev;
-	bdi_debug_register(bdi, dev_name(dev));
+	spin_lock(&bdi_lock);
+	list_add_tail(&bdi->bdi_list, &bdi_list);
+	spin_unlock(&bdi_lock);
 
+	bdi->dev = dev;
+
+	/*
+	 * Just start the forker thread for our default backing_dev_info,
+	 * and add other bdi's to the list. They will get a thread created
+	 * on-demand when they need it.
+	 */
+	if (bdi_cap_flush_forker(bdi)) {
+		struct bdi_writeback *wb = &bdi->wb;
+
+		wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s",
+						dev_name(dev));
+		if (IS_ERR(wb->task)) {
+			wb->task = NULL;
+			ret = -ENOMEM;
+
+			spin_lock(&bdi_lock);
+			list_del(&bdi->bdi_list);
+			spin_unlock(&bdi_lock);
+			goto exit;
+		}
+	}
+
+	bdi_debug_register(bdi, dev_name(dev));
+	set_bit(BDI_registered, &bdi->state);
 exit:
 	return ret;
 }
@@ -225,9 +546,42 @@
 }
 EXPORT_SYMBOL(bdi_register_dev);
 
+/*
+ * Remove bdi from the global list and shutdown any threads we have running
+ */
+static void bdi_wb_shutdown(struct backing_dev_info *bdi)
+{
+	struct bdi_writeback *wb;
+
+	if (!bdi_cap_writeback_dirty(bdi))
+		return;
+
+	/*
+	 * If setup is pending, wait for that to complete first
+	 */
+	wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
+			TASK_UNINTERRUPTIBLE);
+
+	/*
+	 * Make sure nobody finds us on the bdi_list anymore
+	 */
+	spin_lock(&bdi_lock);
+	list_del(&bdi->bdi_list);
+	spin_unlock(&bdi_lock);
+
+	/*
+	 * Finally, kill the kernel threads. We don't need to be RCU
+	 * safe anymore, since the bdi is gone from visibility.
+	 */
+	list_for_each_entry(wb, &bdi->wb_list, list)
+		kthread_stop(wb->task);
+}
+
 void bdi_unregister(struct backing_dev_info *bdi)
 {
 	if (bdi->dev) {
+		if (!bdi_cap_flush_forker(bdi))
+			bdi_wb_shutdown(bdi);
 		bdi_debug_unregister(bdi);
 		device_unregister(bdi->dev);
 		bdi->dev = NULL;
@@ -237,14 +591,25 @@
 
 int bdi_init(struct backing_dev_info *bdi)
 {
-	int i;
-	int err;
+	int i, err;
 
 	bdi->dev = NULL;
 
 	bdi->min_ratio = 0;
 	bdi->max_ratio = 100;
 	bdi->max_prop_frac = PROP_FRAC_BASE;
+	spin_lock_init(&bdi->wb_lock);
+	INIT_LIST_HEAD(&bdi->bdi_list);
+	INIT_LIST_HEAD(&bdi->wb_list);
+	INIT_LIST_HEAD(&bdi->work_list);
+
+	bdi_wb_init(&bdi->wb, bdi);
+
+	/*
+	 * Just one thread support for now, hard code mask and count
+	 */
+	bdi->wb_mask = 1;
+	bdi->wb_cnt = 1;
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
 		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
@@ -269,6 +634,8 @@
 {
 	int i;
 
+	WARN_ON(bdi_has_dirty_io(bdi));
+
 	bdi_unregister(bdi);
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index 701740c..555d5d2 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c

@@ -521,7 +521,11 @@
 		region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
 				start_off);
 		memset(region, 0, size);
-		kmemleak_alloc(region, size, 1, 0);
+		/*
+		 * The min_count is set to 0 so that bootmem allocated blocks
+		 * are never reported as leaks.
+		 */
+		kmemleak_alloc(region, size, 0, 0);
 		return region;
 	}
 

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 4872673..4ea4510 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c

@@ -92,11 +92,13 @@
 #include <linux/string.h>
 #include <linux/nodemask.h>
 #include <linux/mm.h>
+#include <linux/workqueue.h>
 
 #include <asm/sections.h>
 #include <asm/processor.h>
 #include <asm/atomic.h>
 
+#include <linux/kmemcheck.h>
 #include <linux/kmemleak.h>
 
 /*
@@ -107,6 +109,7 @@
 #define SECS_FIRST_SCAN		60	/* delay before the first scan */
 #define SECS_SCAN_WAIT		600	/* subsequent auto scanning delay */
 #define GRAY_LIST_PASSES	25	/* maximum number of gray list scans */
+#define MAX_SCAN_SIZE		4096	/* maximum size of a scanned block */
 
 #define BYTES_PER_POINTER	sizeof(void *)
 
@@ -120,6 +123,9 @@
 	size_t length;
 };
 
+#define KMEMLEAK_GREY	0
+#define KMEMLEAK_BLACK	-1
+
 /*
  * Structure holding the metadata for each allocated memory block.
  * Modifications to such objects should be made while holding the
@@ -161,6 +167,15 @@
 /* flag set on newly allocated objects */
 #define OBJECT_NEW		(1 << 3)
 
+/* number of bytes to print per line; must be 16 or 32 */
+#define HEX_ROW_SIZE		16
+/* number of bytes to print at a time (1, 2, 4, 8) */
+#define HEX_GROUP_SIZE		1
+/* include ASCII after the hex output */
+#define HEX_ASCII		1
+/* max number of lines to be printed */
+#define HEX_MAX_LINES		2
+
 /* the list of all allocated objects */
 static LIST_HEAD(object_list);
 /* the list of gray-colored objects (see color_gray comment below) */
@@ -228,11 +243,14 @@
 	int min_count;			/* minimum reference count */
 	unsigned long offset;		/* scan area offset */
 	size_t length;			/* scan area length */
+	unsigned long trace[MAX_TRACE];	/* stack trace */
+	unsigned int trace_len;		/* stack trace length */
 };
 
 /* early logging buffer and current position */
-static struct early_log early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE];
-static int crt_early_log;
+static struct early_log
+	early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE] __initdata;
+static int crt_early_log __initdata;
 
 static void kmemleak_disable(void);
 
@@ -255,6 +273,35 @@
 } while (0)
 
 /*
+ * Printing of the objects hex dump to the seq file. The number of lines to be
+ * printed is limited to HEX_MAX_LINES to prevent seq file spamming. The
+ * actual number of printed bytes depends on HEX_ROW_SIZE. It must be called
+ * with the object->lock held.
+ */
+static void hex_dump_object(struct seq_file *seq,
+			    struct kmemleak_object *object)
+{
+	const u8 *ptr = (const u8 *)object->pointer;
+	int i, len, remaining;
+	unsigned char linebuf[HEX_ROW_SIZE * 5];
+
+	/* limit the number of lines to HEX_MAX_LINES */
+	remaining = len =
+		min(object->size, (size_t)(HEX_MAX_LINES * HEX_ROW_SIZE));
+
+	seq_printf(seq, "  hex dump (first %d bytes):\n", len);
+	for (i = 0; i < len; i += HEX_ROW_SIZE) {
+		int linelen = min(remaining, HEX_ROW_SIZE);
+
+		remaining -= HEX_ROW_SIZE;
+		hex_dump_to_buffer(ptr + i, linelen, HEX_ROW_SIZE,
+				   HEX_GROUP_SIZE, linebuf, sizeof(linebuf),
+				   HEX_ASCII);
+		seq_printf(seq, "    %s\n", linebuf);
+	}
+}
+
+/*
  * Object colors, encoded with count and min_count:
  * - white - orphan object, not enough references to it (count < min_count)
  * - gray  - not orphan, not marked as false positive (min_count == 0) or
@@ -264,19 +311,21 @@
  * Newly created objects don't have any color assigned (object->count == -1)
  * before the next memory scan when they become white.
  */
-static int color_white(const struct kmemleak_object *object)
+static bool color_white(const struct kmemleak_object *object)
 {
-	return object->count != -1 && object->count < object->min_count;
+	return object->count != KMEMLEAK_BLACK &&
+		object->count < object->min_count;
 }
 
-static int color_gray(const struct kmemleak_object *object)
+static bool color_gray(const struct kmemleak_object *object)
 {
-	return object->min_count != -1 && object->count >= object->min_count;
+	return object->min_count != KMEMLEAK_BLACK &&
+		object->count >= object->min_count;
 }
 
-static int color_black(const struct kmemleak_object *object)
+static bool color_black(const struct kmemleak_object *object)
 {
-	return object->min_count == -1;
+	return object->min_count == KMEMLEAK_BLACK;
 }
 
 /*
@@ -284,7 +333,7 @@
  * not be deleted and have a minimum age to avoid false positives caused by
  * pointers temporarily stored in CPU registers.
  */
-static int unreferenced_object(struct kmemleak_object *object)
+static bool unreferenced_object(struct kmemleak_object *object)
 {
 	return (object->flags & OBJECT_ALLOCATED) && color_white(object) &&
 		time_before_eq(object->jiffies + jiffies_min_age,
@@ -304,6 +353,7 @@
 		   object->pointer, object->size);
 	seq_printf(seq, "  comm \"%s\", pid %d, jiffies %lu\n",
 		   object->comm, object->pid, object->jiffies);
+	hex_dump_object(seq, object);
 	seq_printf(seq, "  backtrace:\n");
 
 	for (i = 0; i < object->trace_len; i++) {
@@ -330,6 +380,7 @@
 		  object->comm, object->pid, object->jiffies);
 	pr_notice("  min_count = %d\n", object->min_count);
 	pr_notice("  count = %d\n", object->count);
+	pr_notice("  flags = 0x%lx\n", object->flags);
 	pr_notice("  backtrace:\n");
 	print_stack_trace(&trace, 4);
 }
@@ -434,21 +485,36 @@
 }
 
 /*
+ * Save stack trace to the given array of MAX_TRACE size.
+ */
+static int __save_stack_trace(unsigned long *trace)
+{
+	struct stack_trace stack_trace;
+
+	stack_trace.max_entries = MAX_TRACE;
+	stack_trace.nr_entries = 0;
+	stack_trace.entries = trace;
+	stack_trace.skip = 2;
+	save_stack_trace(&stack_trace);
+
+	return stack_trace.nr_entries;
+}
+
+/*
  * Create the metadata (struct kmemleak_object) corresponding to an allocated
  * memory block and add it to the object_list and object_tree_root.
  */
-static void create_object(unsigned long ptr, size_t size, int min_count,
-			  gfp_t gfp)
+static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
+					     int min_count, gfp_t gfp)
 {
 	unsigned long flags;
 	struct kmemleak_object *object;
 	struct prio_tree_node *node;
-	struct stack_trace trace;
 
 	object = kmem_cache_alloc(object_cache, gfp & GFP_KMEMLEAK_MASK);
 	if (!object) {
 		kmemleak_stop("Cannot allocate a kmemleak_object structure\n");
-		return;
+		return NULL;
 	}
 
 	INIT_LIST_HEAD(&object->object_list);
@@ -482,18 +548,14 @@
 	}
 
 	/* kernel backtrace */
-	trace.max_entries = MAX_TRACE;
-	trace.nr_entries = 0;
-	trace.entries = object->trace;
-	trace.skip = 1;
-	save_stack_trace(&trace);
-	object->trace_len = trace.nr_entries;
+	object->trace_len = __save_stack_trace(object->trace);
 
 	INIT_PRIO_TREE_NODE(&object->tree_node);
 	object->tree_node.start = ptr;
 	object->tree_node.last = ptr + size - 1;
 
 	write_lock_irqsave(&kmemleak_lock, flags);
+
 	min_addr = min(min_addr, ptr);
 	max_addr = max(max_addr, ptr + size);
 	node = prio_tree_insert(&object_tree_root, &object->tree_node);
@@ -504,20 +566,19 @@
 	 * random memory blocks.
 	 */
 	if (node != &object->tree_node) {
-		unsigned long flags;
-
 		kmemleak_stop("Cannot insert 0x%lx into the object search tree "
 			      "(already existing)\n", ptr);
 		object = lookup_object(ptr, 1);
-		spin_lock_irqsave(&object->lock, flags);
+		spin_lock(&object->lock);
 		dump_object_info(object);
-		spin_unlock_irqrestore(&object->lock, flags);
+		spin_unlock(&object->lock);
 
 		goto out;
 	}
 	list_add_tail_rcu(&object->object_list, &object_list);
 out:
 	write_unlock_irqrestore(&kmemleak_lock, flags);
+	return object;
 }
 
 /*
@@ -604,25 +665,46 @@
 
 	put_object(object);
 }
+
+static void __paint_it(struct kmemleak_object *object, int color)
+{
+	object->min_count = color;
+	if (color == KMEMLEAK_BLACK)
+		object->flags |= OBJECT_NO_SCAN;
+}
+
+static void paint_it(struct kmemleak_object *object, int color)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&object->lock, flags);
+	__paint_it(object, color);
+	spin_unlock_irqrestore(&object->lock, flags);
+}
+
+static void paint_ptr(unsigned long ptr, int color)
+{
+	struct kmemleak_object *object;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+		kmemleak_warn("Trying to color unknown object "
+			      "at 0x%08lx as %s\n", ptr,
+			      (color == KMEMLEAK_GREY) ? "Grey" :
+			      (color == KMEMLEAK_BLACK) ? "Black" : "Unknown");
+		return;
+	}
+	paint_it(object, color);
+	put_object(object);
+}
+
 /*
  * Make a object permanently as gray-colored so that it can no longer be
  * reported as a leak. This is used in general to mark a false positive.
  */
 static void make_gray_object(unsigned long ptr)
 {
-	unsigned long flags;
-	struct kmemleak_object *object;
-
-	object = find_and_get_object(ptr, 0);
-	if (!object) {
-		kmemleak_warn("Graying unknown object at 0x%08lx\n", ptr);
-		return;
-	}
-
-	spin_lock_irqsave(&object->lock, flags);
-	object->min_count = 0;
-	spin_unlock_irqrestore(&object->lock, flags);
-	put_object(object);
+	paint_ptr(ptr, KMEMLEAK_GREY);
 }
 
 /*
@@ -631,19 +713,7 @@
  */
 static void make_black_object(unsigned long ptr)
 {
-	unsigned long flags;
-	struct kmemleak_object *object;
-
-	object = find_and_get_object(ptr, 0);
-	if (!object) {
-		kmemleak_warn("Blacking unknown object at 0x%08lx\n", ptr);
-		return;
-	}
-
-	spin_lock_irqsave(&object->lock, flags);
-	object->min_count = -1;
-	spin_unlock_irqrestore(&object->lock, flags);
-	put_object(object);
+	paint_ptr(ptr, KMEMLEAK_BLACK);
 }
 
 /*
@@ -715,14 +785,15 @@
  * Log an early kmemleak_* call to the early_log buffer. These calls will be
  * processed later once kmemleak is fully initialized.
  */
-static void log_early(int op_type, const void *ptr, size_t size,
-		      int min_count, unsigned long offset, size_t length)
+static void __init log_early(int op_type, const void *ptr, size_t size,
+			     int min_count, unsigned long offset, size_t length)
 {
 	unsigned long flags;
 	struct early_log *log;
 
 	if (crt_early_log >= ARRAY_SIZE(early_log)) {
-		pr_warning("Early log buffer exceeded\n");
+		pr_warning("Early log buffer exceeded, "
+			   "please increase DEBUG_KMEMLEAK_EARLY_LOG_SIZE\n");
 		kmemleak_disable();
 		return;
 	}
@@ -739,16 +810,45 @@
 	log->min_count = min_count;
 	log->offset = offset;
 	log->length = length;
+	if (op_type == KMEMLEAK_ALLOC)
+		log->trace_len = __save_stack_trace(log->trace);
 	crt_early_log++;
 	local_irq_restore(flags);
 }
 
 /*
+ * Log an early allocated block and populate the stack trace.
+ */
+static void early_alloc(struct early_log *log)
+{
+	struct kmemleak_object *object;
+	unsigned long flags;
+	int i;
+
+	if (!atomic_read(&kmemleak_enabled) || !log->ptr || IS_ERR(log->ptr))
+		return;
+
+	/*
+	 * RCU locking needed to ensure object is not freed via put_object().
+	 */
+	rcu_read_lock();
+	object = create_object((unsigned long)log->ptr, log->size,
+			       log->min_count, GFP_KERNEL);
+	spin_lock_irqsave(&object->lock, flags);
+	for (i = 0; i < log->trace_len; i++)
+		object->trace[i] = log->trace[i];
+	object->trace_len = log->trace_len;
+	spin_unlock_irqrestore(&object->lock, flags);
+	rcu_read_unlock();
+}
+
+/*
  * Memory allocation function callback. This function is called from the
  * kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc,
  * vmalloc etc.).
  */
-void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp)
+void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
+			  gfp_t gfp)
 {
 	pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count);
 
@@ -763,7 +863,7 @@
  * Memory freeing function callback. This function is called from the kernel
  * allocators when a block is freed (kmem_cache_free, kfree, vfree etc.).
  */
-void kmemleak_free(const void *ptr)
+void __ref kmemleak_free(const void *ptr)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -778,7 +878,7 @@
  * Partial memory freeing function callback. This function is usually called
  * from bootmem allocator when (part of) a memory block is freed.
  */
-void kmemleak_free_part(const void *ptr, size_t size)
+void __ref kmemleak_free_part(const void *ptr, size_t size)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -793,7 +893,7 @@
  * Mark an already allocated memory block as a false positive. This will cause
  * the block to no longer be reported as leak and always be scanned.
  */
-void kmemleak_not_leak(const void *ptr)
+void __ref kmemleak_not_leak(const void *ptr)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -809,7 +909,7 @@
  * corresponding block is not a leak and does not contain any references to
  * other allocated memory blocks.
  */
-void kmemleak_ignore(const void *ptr)
+void __ref kmemleak_ignore(const void *ptr)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -823,8 +923,8 @@
 /*
  * Limit the range to be scanned in an allocated memory block.
  */
-void kmemleak_scan_area(const void *ptr, unsigned long offset, size_t length,
-			gfp_t gfp)
+void __ref kmemleak_scan_area(const void *ptr, unsigned long offset,
+			      size_t length, gfp_t gfp)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -838,7 +938,7 @@
 /*
  * Inform kmemleak not to scan the given memory block.
  */
-void kmemleak_no_scan(const void *ptr)
+void __ref kmemleak_no_scan(const void *ptr)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -882,15 +982,22 @@
 	unsigned long *end = _end - (BYTES_PER_POINTER - 1);
 
 	for (ptr = start; ptr < end; ptr++) {
-		unsigned long flags;
-		unsigned long pointer = *ptr;
 		struct kmemleak_object *object;
+		unsigned long flags;
+		unsigned long pointer;
 
 		if (allow_resched)
 			cond_resched();
 		if (scan_should_stop())
 			break;
 
+		/* don't scan uninitialized memory */
+		if (!kmemcheck_is_obj_initialized((unsigned long)ptr,
+						  BYTES_PER_POINTER))
+			continue;
+
+		pointer = *ptr;
+
 		object = find_and_get_object(pointer, 1);
 		if (!object)
 			continue;
@@ -949,10 +1056,21 @@
 	if (!(object->flags & OBJECT_ALLOCATED))
 		/* already freed object */
 		goto out;
-	if (hlist_empty(&object->area_list))
-		scan_block((void *)object->pointer,
-			   (void *)(object->pointer + object->size), object, 0);
-	else
+	if (hlist_empty(&object->area_list)) {
+		void *start = (void *)object->pointer;
+		void *end = (void *)(object->pointer + object->size);
+
+		while (start < end && (object->flags & OBJECT_ALLOCATED) &&
+		       !(object->flags & OBJECT_NO_SCAN)) {
+			scan_block(start, min(start + MAX_SCAN_SIZE, end),
+				   object, 0);
+			start += MAX_SCAN_SIZE;
+
+			spin_unlock_irqrestore(&object->lock, flags);
+			cond_resched();
+			spin_lock_irqsave(&object->lock, flags);
+		}
+	} else
 		hlist_for_each_entry(area, elem, &object->area_list, node)
 			scan_block((void *)(object->pointer + area->offset),
 				   (void *)(object->pointer + area->offset
@@ -970,7 +1088,6 @@
 {
 	unsigned long flags;
 	struct kmemleak_object *object, *tmp;
-	struct task_struct *task;
 	int i;
 	int new_leaks = 0;
 	int gray_list_pass = 0;
@@ -1037,15 +1154,16 @@
 	}
 
 	/*
-	 * Scanning the task stacks may introduce false negatives and it is
-	 * not enabled by default.
+	 * Scanning the task stacks (may introduce false negatives).
 	 */
 	if (kmemleak_stack_scan) {
+		struct task_struct *p, *g;
+
 		read_lock(&tasklist_lock);
-		for_each_process(task)
-			scan_block(task_stack_page(task),
-				   task_stack_page(task) + THREAD_SIZE,
-				   NULL, 0);
+		do_each_thread(g, p) {
+			scan_block(task_stack_page(p), task_stack_page(p) +
+				   THREAD_SIZE, NULL, 0);
+		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 	}
 
@@ -1170,7 +1288,7 @@
  * Start the automatic memory scanning thread. This function must be called
  * with the scan_mutex held.
  */
-void start_scan_thread(void)
+static void start_scan_thread(void)
 {
 	if (scan_thread)
 		return;
@@ -1185,7 +1303,7 @@
  * Stop the automatic memory scanning thread. This function must be called
  * with the scan_mutex held.
  */
-void stop_scan_thread(void)
+static void stop_scan_thread(void)
 {
 	if (scan_thread) {
 		kthread_stop(scan_thread);
@@ -1294,6 +1412,49 @@
 	return seq_release(inode, file);
 }
 
+static int dump_str_object_info(const char *str)
+{
+	unsigned long flags;
+	struct kmemleak_object *object;
+	unsigned long addr;
+
+	addr= simple_strtoul(str, NULL, 0);
+	object = find_and_get_object(addr, 0);
+	if (!object) {
+		pr_info("Unknown object at 0x%08lx\n", addr);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	dump_object_info(object);
+	spin_unlock_irqrestore(&object->lock, flags);
+
+	put_object(object);
+	return 0;
+}
+
+/*
+ * We use grey instead of black to ensure we can do future scans on the same
+ * objects. If we did not do future scans these black objects could
+ * potentially contain references to newly allocated objects in the future and
+ * we'd end up with false positives.
+ */
+static void kmemleak_clear(void)
+{
+	struct kmemleak_object *object;
+	unsigned long flags;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(object, &object_list, object_list) {
+		spin_lock_irqsave(&object->lock, flags);
+		if ((object->flags & OBJECT_REPORTED) &&
+		    unreferenced_object(object))
+			__paint_it(object, KMEMLEAK_GREY);
+		spin_unlock_irqrestore(&object->lock, flags);
+	}
+	rcu_read_unlock();
+}
+
 /*
  * File write operation to configure kmemleak at run-time. The following
  * commands can be written to the /sys/kernel/debug/kmemleak file:
@@ -1305,6 +1466,9 @@
  *   scan=...	- set the automatic memory scanning period in seconds (0 to
  *		  disable it)
  *   scan	- trigger a memory scan
+ *   clear	- mark all current reported unreferenced kmemleak objects as
+ *		  grey to ignore printing them
+ *   dump=...	- dump information about the object found at the given address
  */
 static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
 			      size_t size, loff_t *ppos)
@@ -1345,6 +1509,10 @@
 		}
 	} else if (strncmp(buf, "scan", 4) == 0)
 		kmemleak_scan();
+	else if (strncmp(buf, "clear", 5) == 0)
+		kmemleak_clear();
+	else if (strncmp(buf, "dump=", 5) == 0)
+		ret = dump_str_object_info(buf + 5);
 	else
 		ret = -EINVAL;
 
@@ -1371,7 +1539,7 @@
  * Perform the freeing of the kmemleak internal objects after waiting for any
  * current memory scan to complete.
  */
-static int kmemleak_cleanup_thread(void *arg)
+static void kmemleak_do_cleanup(struct work_struct *work)
 {
 	struct kmemleak_object *object;
 
@@ -1383,22 +1551,9 @@
 		delete_object_full(object->pointer);
 	rcu_read_unlock();
 	mutex_unlock(&scan_mutex);
-
-	return 0;
 }
 
-/*
- * Start the clean-up thread.
- */
-static void kmemleak_cleanup(void)
-{
-	struct task_struct *cleanup_thread;
-
-	cleanup_thread = kthread_run(kmemleak_cleanup_thread, NULL,
-				     "kmemleak-clean");
-	if (IS_ERR(cleanup_thread))
-		pr_warning("Failed to create the clean-up thread\n");
-}
+static DECLARE_WORK(cleanup_work, kmemleak_do_cleanup);
 
 /*
  * Disable kmemleak. No memory allocation/freeing will be traced once this
@@ -1416,7 +1571,7 @@
 
 	/* check whether it is too early for a kernel thread */
 	if (atomic_read(&kmemleak_initialized))
-		kmemleak_cleanup();
+		schedule_work(&cleanup_work);
 
 	pr_info("Kernel memory leak detector disabled\n");
 }
@@ -1469,8 +1624,7 @@
 
 		switch (log->op_type) {
 		case KMEMLEAK_ALLOC:
-			kmemleak_alloc(log->ptr, log->size, log->min_count,
-				       GFP_KERNEL);
+			early_alloc(log);
 			break;
 		case KMEMLEAK_FREE:
 			kmemleak_free(log->ptr);
@@ -1513,7 +1667,7 @@
 		 * after setting kmemleak_initialized and we may end up with
 		 * two clean-up threads but serialized by scan_mutex.
 		 */
-		kmemleak_cleanup();
+		schedule_work(&cleanup_work);
 		return -ENOMEM;
 	}
 

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 81627eb..25e7770 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c

@@ -36,15 +36,6 @@
 #include <linux/pagevec.h>
 
 /*
- * The maximum number of pages to writeout in a single bdflush/kupdate
- * operation.  We do this so we don't hold I_SYNC against an inode for
- * enormous amounts of time, which would block a userspace task which has
- * been forced to throttle against that inode.  Also, the code reevaluates
- * the dirty each time it has written this many pages.
- */
-#define MAX_WRITEBACK_PAGES	1024
-
-/*
  * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
  * will look to see if it needs to force writeback or throttling.
  */
@@ -117,8 +108,6 @@
 /* End of sysctl-exported parameters */
 
 
-static void background_writeout(unsigned long _min_pages);
-
 /*
  * Scale the writeback cache size proportional to the relative writeout speeds.
  *
@@ -320,15 +309,13 @@
 /*
  *
  */
-static DEFINE_SPINLOCK(bdi_lock);
 static unsigned int bdi_min_ratio;
 
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 {
 	int ret = 0;
-	unsigned long flags;
 
-	spin_lock_irqsave(&bdi_lock, flags);
+	spin_lock(&bdi_lock);
 	if (min_ratio > bdi->max_ratio) {
 		ret = -EINVAL;
 	} else {
@@ -340,27 +327,26 @@
 			ret = -EINVAL;
 		}
 	}
-	spin_unlock_irqrestore(&bdi_lock, flags);
+	spin_unlock(&bdi_lock);
 
 	return ret;
 }
 
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 {
-	unsigned long flags;
 	int ret = 0;
 
 	if (max_ratio > 100)
 		return -EINVAL;
 
-	spin_lock_irqsave(&bdi_lock, flags);
+	spin_lock(&bdi_lock);
 	if (bdi->min_ratio > max_ratio) {
 		ret = -EINVAL;
 	} else {
 		bdi->max_ratio = max_ratio;
 		bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
 	}
-	spin_unlock_irqrestore(&bdi_lock, flags);
+	spin_unlock(&bdi_lock);
 
 	return ret;
 }
@@ -546,7 +532,7 @@
 		 * up.
 		 */
 		if (bdi_nr_reclaimable > bdi_thresh) {
-			writeback_inodes(&wbc);
+			writeback_inodes_wbc(&wbc);
 			pages_written += write_chunk - wbc.nr_to_write;
 			get_dirty_limits(&background_thresh, &dirty_thresh,
 				       &bdi_thresh, bdi);
@@ -575,7 +561,7 @@
 		if (pages_written >= write_chunk)
 			break;		/* We've done our duty */
 
-		congestion_wait(BLK_RW_ASYNC, HZ/10);
+		schedule_timeout(1);
 	}
 
 	if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
@@ -594,10 +580,18 @@
 	 * background_thresh, to keep the amount of dirty memory low.
 	 */
 	if ((laptop_mode && pages_written) ||
-			(!laptop_mode && (global_page_state(NR_FILE_DIRTY)
-					  + global_page_state(NR_UNSTABLE_NFS)
-					  > background_thresh)))
-		pdflush_operation(background_writeout, 0);
+	    (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY)
+					  + global_page_state(NR_UNSTABLE_NFS))
+					  > background_thresh))) {
+		struct writeback_control wbc = {
+			.bdi		= bdi,
+			.sync_mode	= WB_SYNC_NONE,
+			.nr_to_write	= nr_writeback,
+		};
+
+
+		bdi_start_writeback(&wbc);
+	}
 }
 
 void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -681,153 +675,35 @@
         }
 }
 
-/*
- * writeback at least _min_pages, and keep writing until the amount of dirty
- * memory is less than the background threshold, or until we're all clean.
- */
-static void background_writeout(unsigned long _min_pages)
-{
-	long min_pages = _min_pages;
-	struct writeback_control wbc = {
-		.bdi		= NULL,
-		.sync_mode	= WB_SYNC_NONE,
-		.older_than_this = NULL,
-		.nr_to_write	= 0,
-		.nonblocking	= 1,
-		.range_cyclic	= 1,
-	};
-
-	for ( ; ; ) {
-		unsigned long background_thresh;
-		unsigned long dirty_thresh;
-
-		get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
-		if (global_page_state(NR_FILE_DIRTY) +
-			global_page_state(NR_UNSTABLE_NFS) < background_thresh
-				&& min_pages <= 0)
-			break;
-		wbc.more_io = 0;
-		wbc.encountered_congestion = 0;
-		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
-		wbc.pages_skipped = 0;
-		writeback_inodes(&wbc);
-		min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
-		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
-			/* Wrote less than expected */
-			if (wbc.encountered_congestion || wbc.more_io)
-				congestion_wait(BLK_RW_ASYNC, HZ/10);
-			else
-				break;
-		}
-	}
-}
-
-/*
- * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
- * the whole world.  Returns 0 if a pdflush thread was dispatched.  Returns
- * -1 if all pdflush threads were busy.
- */
-int wakeup_pdflush(long nr_pages)
-{
-	if (nr_pages == 0)
-		nr_pages = global_page_state(NR_FILE_DIRTY) +
-				global_page_state(NR_UNSTABLE_NFS);
-	return pdflush_operation(background_writeout, nr_pages);
-}
-
-static void wb_timer_fn(unsigned long unused);
 static void laptop_timer_fn(unsigned long unused);
 
-static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0);
 static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
 
 /*
- * Periodic writeback of "old" data.
- *
- * Define "old": the first time one of an inode's pages is dirtied, we mark the
- * dirtying-time in the inode's address_space.  So this periodic writeback code
- * just walks the superblock inode list, writing back any inodes which are
- * older than a specific point in time.
- *
- * Try to run once per dirty_writeback_interval.  But if a writeback event
- * takes longer than a dirty_writeback_interval interval, then leave a
- * one-second gap.
- *
- * older_than_this takes precedence over nr_to_write.  So we'll only write back
- * all dirty pages if they are all attached to "old" mappings.
- */
-static void wb_kupdate(unsigned long arg)
-{
-	unsigned long oldest_jif;
-	unsigned long start_jif;
-	unsigned long next_jif;
-	long nr_to_write;
-	struct writeback_control wbc = {
-		.bdi		= NULL,
-		.sync_mode	= WB_SYNC_NONE,
-		.older_than_this = &oldest_jif,
-		.nr_to_write	= 0,
-		.nonblocking	= 1,
-		.for_kupdate	= 1,
-		.range_cyclic	= 1,
-	};
-
-	sync_supers();
-
-	oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10);
-	start_jif = jiffies;
-	next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10);
-	nr_to_write = global_page_state(NR_FILE_DIRTY) +
-			global_page_state(NR_UNSTABLE_NFS) +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
-	while (nr_to_write > 0) {
-		wbc.more_io = 0;
-		wbc.encountered_congestion = 0;
-		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
-		writeback_inodes(&wbc);
-		if (wbc.nr_to_write > 0) {
-			if (wbc.encountered_congestion || wbc.more_io)
-				congestion_wait(BLK_RW_ASYNC, HZ/10);
-			else
-				break;	/* All the old data is written */
-		}
-		nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
-	}
-	if (time_before(next_jif, jiffies + HZ))
-		next_jif = jiffies + HZ;
-	if (dirty_writeback_interval)
-		mod_timer(&wb_timer, next_jif);
-}
-
-/*
  * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
  */
 int dirty_writeback_centisecs_handler(ctl_table *table, int write,
 	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
 {
 	proc_dointvec(table, write, file, buffer, length, ppos);
-	if (dirty_writeback_interval)
-		mod_timer(&wb_timer, jiffies +
-			msecs_to_jiffies(dirty_writeback_interval * 10));
-	else
-		del_timer(&wb_timer);
 	return 0;
 }
 
-static void wb_timer_fn(unsigned long unused)
+static void do_laptop_sync(struct work_struct *work)
 {
-	if (pdflush_operation(wb_kupdate, 0) < 0)
-		mod_timer(&wb_timer, jiffies + HZ); /* delay 1 second */
-}
-
-static void laptop_flush(unsigned long unused)
-{
-	sys_sync();
+	wakeup_flusher_threads(0);
+	kfree(work);
 }
 
 static void laptop_timer_fn(unsigned long unused)
 {
-	pdflush_operation(laptop_flush, 0);
+	struct work_struct *work;
+
+	work = kmalloc(sizeof(*work), GFP_ATOMIC);
+	if (work) {
+		INIT_WORK(work, do_laptop_sync);
+		schedule_work(work);
+	}
 }
 
 /*
@@ -910,8 +786,6 @@
 {
 	int shift;
 
-	mod_timer(&wb_timer,
-		  jiffies + msecs_to_jiffies(dirty_writeback_interval * 10));
 	writeback_set_ratelimit();
 	register_cpu_notifier(&ratelimit_nb);
 

diff --git a/mm/pdflush.c b/mm/pdflush.c
deleted file mode 100644
index 235ac44..0000000
--- a/mm/pdflush.c
+++ /dev/null

@@ -1,269 +0,0 @@
-/*
- * mm/pdflush.c - worker threads for writing back filesystem data
- *
- * Copyright (C) 2002, Linus Torvalds.
- *
- * 09Apr2002	Andrew Morton
- *		Initial version
- * 29Feb2004	kaos@sgi.com
- *		Move worker thread creation to kthread to avoid chewing
- *		up stack space with nested calls to kernel_thread.
- */
-
-#include <linux/sched.h>
-#include <linux/list.h>
-#include <linux/signal.h>
-#include <linux/spinlock.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/fs.h>		/* Needed by writeback.h	  */
-#include <linux/writeback.h>	/* Prototypes pdflush_operation() */
-#include <linux/kthread.h>
-#include <linux/cpuset.h>
-#include <linux/freezer.h>
-
-
-/*
- * Minimum and maximum number of pdflush instances
- */
-#define MIN_PDFLUSH_THREADS	2
-#define MAX_PDFLUSH_THREADS	8
-
-static void start_one_pdflush_thread(void);
-
-
-/*
- * The pdflush threads are worker threads for writing back dirty data.
- * Ideally, we'd like one thread per active disk spindle.  But the disk
- * topology is very hard to divine at this level.   Instead, we take
- * care in various places to prevent more than one pdflush thread from
- * performing writeback against a single filesystem.  pdflush threads
- * have the PF_FLUSHER flag set in current->flags to aid in this.
- */
-
-/*
- * All the pdflush threads.  Protected by pdflush_lock
- */
-static LIST_HEAD(pdflush_list);
-static DEFINE_SPINLOCK(pdflush_lock);
-
-/*
- * The count of currently-running pdflush threads.  Protected
- * by pdflush_lock.
- *
- * Readable by sysctl, but not writable.  Published to userspace at
- * /proc/sys/vm/nr_pdflush_threads.
- */
-int nr_pdflush_threads = 0;
-
-/*
- * The time at which the pdflush thread pool last went empty
- */
-static unsigned long last_empty_jifs;
-
-/*
- * The pdflush thread.
- *
- * Thread pool management algorithm:
- * 
- * - The minimum and maximum number of pdflush instances are bound
- *   by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS.
- * 
- * - If there have been no idle pdflush instances for 1 second, create
- *   a new one.
- * 
- * - If the least-recently-went-to-sleep pdflush thread has been asleep
- *   for more than one second, terminate a thread.
- */
-
-/*
- * A structure for passing work to a pdflush thread.  Also for passing
- * state information between pdflush threads.  Protected by pdflush_lock.
- */
-struct pdflush_work {
-	struct task_struct *who;	/* The thread */
-	void (*fn)(unsigned long);	/* A callback function */
-	unsigned long arg0;		/* An argument to the callback */
-	struct list_head list;		/* On pdflush_list, when idle */
-	unsigned long when_i_went_to_sleep;
-};
-
-static int __pdflush(struct pdflush_work *my_work)
-{
-	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
-	set_freezable();
-	my_work->fn = NULL;
-	my_work->who = current;
-	INIT_LIST_HEAD(&my_work->list);
-
-	spin_lock_irq(&pdflush_lock);
-	for ( ; ; ) {
-		struct pdflush_work *pdf;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		list_move(&my_work->list, &pdflush_list);
-		my_work->when_i_went_to_sleep = jiffies;
-		spin_unlock_irq(&pdflush_lock);
-		schedule();
-		try_to_freeze();
-		spin_lock_irq(&pdflush_lock);
-		if (!list_empty(&my_work->list)) {
-			/*
-			 * Someone woke us up, but without removing our control
-			 * structure from the global list.  swsusp will do this
-			 * in try_to_freeze()->refrigerator().  Handle it.
-			 */
-			my_work->fn = NULL;
-			continue;
-		}
-		if (my_work->fn == NULL) {
-			printk("pdflush: bogus wakeup\n");
-			continue;
-		}
-		spin_unlock_irq(&pdflush_lock);
-
-		(*my_work->fn)(my_work->arg0);
-
-		spin_lock_irq(&pdflush_lock);
-
-		/*
-		 * Thread creation: For how long have there been zero
-		 * available threads?
-		 *
-		 * To throttle creation, we reset last_empty_jifs.
-		 */
-		if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
-			if (list_empty(&pdflush_list)) {
-				if (nr_pdflush_threads < MAX_PDFLUSH_THREADS) {
-					last_empty_jifs = jiffies;
-					nr_pdflush_threads++;
-					spin_unlock_irq(&pdflush_lock);
-					start_one_pdflush_thread();
-					spin_lock_irq(&pdflush_lock);
-				}
-			}
-		}
-
-		my_work->fn = NULL;
-
-		/*
-		 * Thread destruction: For how long has the sleepiest
-		 * thread slept?
-		 */
-		if (list_empty(&pdflush_list))
-			continue;
-		if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
-			continue;
-		pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
-		if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
-			/* Limit exit rate */
-			pdf->when_i_went_to_sleep = jiffies;
-			break;					/* exeunt */
-		}
-	}
-	nr_pdflush_threads--;
-	spin_unlock_irq(&pdflush_lock);
-	return 0;
-}
-
-/*
- * Of course, my_work wants to be just a local in __pdflush().  It is
- * separated out in this manner to hopefully prevent the compiler from
- * performing unfortunate optimisations against the auto variables.  Because
- * these are visible to other tasks and CPUs.  (No problem has actually
- * been observed.  This is just paranoia).
- */
-static int pdflush(void *dummy)
-{
-	struct pdflush_work my_work;
-	cpumask_var_t cpus_allowed;
-
-	/*
-	 * Since the caller doesn't even check kthread_run() worked, let's not
-	 * freak out too much if this fails.
-	 */
-	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
-		printk(KERN_WARNING "pdflush failed to allocate cpumask\n");
-		return 0;
-	}
-
-	/*
-	 * pdflush can spend a lot of time doing encryption via dm-crypt.  We
-	 * don't want to do that at keventd's priority.
-	 */
-	set_user_nice(current, 0);
-
-	/*
-	 * Some configs put our parent kthread in a limited cpuset,
-	 * which kthread() overrides, forcing cpus_allowed == cpu_all_mask.
-	 * Our needs are more modest - cut back to our cpusets cpus_allowed.
-	 * This is needed as pdflush's are dynamically created and destroyed.
-	 * The boottime pdflush's are easily placed w/o these 2 lines.
-	 */
-	cpuset_cpus_allowed(current, cpus_allowed);
-	set_cpus_allowed_ptr(current, cpus_allowed);
-	free_cpumask_var(cpus_allowed);
-
-	return __pdflush(&my_work);
-}
-
-/*
- * Attempt to wake up a pdflush thread, and get it to do some work for you.
- * Returns zero if it indeed managed to find a worker thread, and passed your
- * payload to it.
- */
-int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	BUG_ON(fn == NULL);	/* Hard to diagnose if it's deferred */
-
-	spin_lock_irqsave(&pdflush_lock, flags);
-	if (list_empty(&pdflush_list)) {
-		ret = -1;
-	} else {
-		struct pdflush_work *pdf;
-
-		pdf = list_entry(pdflush_list.next, struct pdflush_work, list);
-		list_del_init(&pdf->list);
-		if (list_empty(&pdflush_list))
-			last_empty_jifs = jiffies;
-		pdf->fn = fn;
-		pdf->arg0 = arg0;
-		wake_up_process(pdf->who);
-	}
-	spin_unlock_irqrestore(&pdflush_lock, flags);
-
-	return ret;
-}
-
-static void start_one_pdflush_thread(void)
-{
-	struct task_struct *k;
-
-	k = kthread_run(pdflush, NULL, "pdflush");
-	if (unlikely(IS_ERR(k))) {
-		spin_lock_irq(&pdflush_lock);
-		nr_pdflush_threads--;
-		spin_unlock_irq(&pdflush_lock);
-	}
-}
-
-static int __init pdflush_init(void)
-{
-	int i;
-
-	/*
-	 * Pre-set nr_pdflush_threads...  If we fail to create,
-	 * the count will be decremented.
-	 */
-	nr_pdflush_threads = MIN_PDFLUSH_THREADS;
-
-	for (i = 0; i < MIN_PDFLUSH_THREADS; i++)
-		start_one_pdflush_thread();
-	return 0;
-}
-
-module_init(pdflush_init);

diff --git a/mm/shmem.c b/mm/shmem.c
index d713239..5a0b3d4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c

@@ -2446,7 +2446,7 @@
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.permission	= shmem_permission,
+	.check_acl	= shmem_check_acl,
 #endif
 
 };
@@ -2469,7 +2469,7 @@
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.permission	= shmem_permission,
+	.check_acl	= shmem_check_acl,
 #endif
 };
 
@@ -2480,7 +2480,7 @@
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.permission	= shmem_permission,
+	.check_acl	= shmem_check_acl,
 #endif
 };
 

diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
index 606a8e7..df2c87f 100644
--- a/mm/shmem_acl.c
+++ b/mm/shmem_acl.c

@@ -157,7 +157,7 @@
 /**
  * shmem_check_acl  -  check_acl() callback for generic_permission()
  */
-static int
+int
 shmem_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS);
@@ -169,12 +169,3 @@
 	}
 	return -EAGAIN;
 }
-
-/**
- * shmem_permission  -  permission() inode operation
- */
-int
-shmem_permission(struct inode *inode, int mask)
-{
-	return generic_permission(inode, mask, shmem_check_acl);
-}

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 42cd38e..5ae6b8b 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c

@@ -34,6 +34,7 @@
 };
 
 static struct backing_dev_info swap_backing_dev_info = {
+	.name		= "swap",
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
 	.unplug_io_fn	= swap_unplug_io_fn,
 };

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 94e86dd..ba8228e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c

@@ -1720,7 +1720,7 @@
 		 */
 		if (total_scanned > sc->swap_cluster_max +
 					sc->swap_cluster_max / 2) {
-			wakeup_pdflush(laptop_mode ? 0 : total_scanned);
+			wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
 			sc->may_writepage = 1;
 		}
 

diff --git a/net/core/dev.c b/net/core/dev.c
index 6a94475..278d489 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c

@@ -1031,7 +1031,7 @@
 	dev = __dev_get_by_name(net, name);
 	read_unlock(&dev_base_lock);
 
-	if (!dev && capable(CAP_SYS_MODULE))
+	if (!dev && capable(CAP_NET_ADMIN))
 		request_module("%s", name);
 }
 

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index e92beb9..6428b34 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c

@@ -116,7 +116,7 @@
 	spin_lock(&tcp_cong_list_lock);
 	ca = tcp_ca_find(name);
 #ifdef CONFIG_MODULES
-	if (!ca && capable(CAP_SYS_MODULE)) {
+	if (!ca && capable(CAP_NET_ADMIN)) {
 		spin_unlock(&tcp_cong_list_lock);
 
 		request_module("tcp_%s", name);
@@ -246,7 +246,7 @@
 
 #ifdef CONFIG_MODULES
 	/* not found attempt to autoload module */
-	if (!ca && capable(CAP_SYS_MODULE)) {
+	if (!ca && capable(CAP_NET_ADMIN)) {
 		rcu_read_unlock();
 		request_module("tcp_%s", name);
 		rcu_read_lock();

diff --git a/security/Makefile b/security/Makefile
index b56e7f9..95ecc06 100644
--- a/security/Makefile
+++ b/security/Makefile

@@ -16,9 +16,7 @@
 # Must precede capability.o in order to stack properly.
 obj-$(CONFIG_SECURITY_SELINUX)		+= selinux/built-in.o
 obj-$(CONFIG_SECURITY_SMACK)		+= smack/built-in.o
-ifeq ($(CONFIG_AUDIT),y)
-obj-$(CONFIG_SECURITY_SMACK)		+= lsm_audit.o
-endif
+obj-$(CONFIG_AUDIT)			+= lsm_audit.o
 obj-$(CONFIG_SECURITY_TOMOYO)		+= tomoyo/built-in.o
 obj-$(CONFIG_SECURITY_ROOTPLUG)		+= root_plug.o
 obj-$(CONFIG_CGROUP_DEVICE)		+= device_cgroup.o

diff --git a/security/capability.c b/security/capability.c
index 88f752e..fce07a7 100644
--- a/security/capability.c
+++ b/security/capability.c

@@ -373,6 +373,11 @@
 	return 0;
 }
 
+static int cap_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+	return 0;
+}
+
 static void cap_cred_free(struct cred *cred)
 {
 }
@@ -386,6 +391,10 @@
 {
 }
 
+static void cap_cred_transfer(struct cred *new, const struct cred *old)
+{
+}
+
 static int cap_kernel_act_as(struct cred *new, u32 secid)
 {
 	return 0;
@@ -396,6 +405,11 @@
 	return 0;
 }
 
+static int cap_kernel_module_request(void)
+{
+	return 0;
+}
+
 static int cap_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
 {
 	return 0;
@@ -701,10 +715,26 @@
 {
 }
 
+
+
 static void cap_req_classify_flow(const struct request_sock *req,
 				  struct flowi *fl)
 {
 }
+
+static int cap_tun_dev_create(void)
+{
+	return 0;
+}
+
+static void cap_tun_dev_post_create(struct sock *sk)
+{
+}
+
+static int cap_tun_dev_attach(struct sock *sk)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -792,6 +822,20 @@
 {
 }
 
+static int cap_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+	return 0;
+}
+
+static int cap_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+	return 0;
+}
+
+static int cap_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+	return 0;
+}
 #ifdef CONFIG_KEYS
 static int cap_key_alloc(struct key *key, const struct cred *cred,
 			 unsigned long flags)
@@ -815,6 +859,13 @@
 	return 0;
 }
 
+static int cap_key_session_to_parent(const struct cred *cred,
+				     const struct cred *parent_cred,
+				     struct key *key)
+{
+	return 0;
+}
+
 #endif /* CONFIG_KEYS */
 
 #ifdef CONFIG_AUDIT
@@ -854,7 +905,7 @@
 
 void security_fixup_ops(struct security_operations *ops)
 {
-	set_to_cap_if_null(ops, ptrace_may_access);
+	set_to_cap_if_null(ops, ptrace_access_check);
 	set_to_cap_if_null(ops, ptrace_traceme);
 	set_to_cap_if_null(ops, capget);
 	set_to_cap_if_null(ops, capset);
@@ -940,11 +991,14 @@
 	set_to_cap_if_null(ops, file_receive);
 	set_to_cap_if_null(ops, dentry_open);
 	set_to_cap_if_null(ops, task_create);
+	set_to_cap_if_null(ops, cred_alloc_blank);
 	set_to_cap_if_null(ops, cred_free);
 	set_to_cap_if_null(ops, cred_prepare);
 	set_to_cap_if_null(ops, cred_commit);
+	set_to_cap_if_null(ops, cred_transfer);
 	set_to_cap_if_null(ops, kernel_act_as);
 	set_to_cap_if_null(ops, kernel_create_files_as);
+	set_to_cap_if_null(ops, kernel_module_request);
 	set_to_cap_if_null(ops, task_setuid);
 	set_to_cap_if_null(ops, task_fix_setuid);
 	set_to_cap_if_null(ops, task_setgid);
@@ -992,6 +1046,9 @@
 	set_to_cap_if_null(ops, secid_to_secctx);
 	set_to_cap_if_null(ops, secctx_to_secid);
 	set_to_cap_if_null(ops, release_secctx);
+	set_to_cap_if_null(ops, inode_notifysecctx);
+	set_to_cap_if_null(ops, inode_setsecctx);
+	set_to_cap_if_null(ops, inode_getsecctx);
 #ifdef CONFIG_SECURITY_NETWORK
 	set_to_cap_if_null(ops, unix_stream_connect);
 	set_to_cap_if_null(ops, unix_may_send);
@@ -1020,6 +1077,9 @@
 	set_to_cap_if_null(ops, inet_csk_clone);
 	set_to_cap_if_null(ops, inet_conn_established);
 	set_to_cap_if_null(ops, req_classify_flow);
+	set_to_cap_if_null(ops, tun_dev_create);
+	set_to_cap_if_null(ops, tun_dev_post_create);
+	set_to_cap_if_null(ops, tun_dev_attach);
 #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	set_to_cap_if_null(ops, xfrm_policy_alloc_security);
@@ -1038,6 +1098,7 @@
 	set_to_cap_if_null(ops, key_free);
 	set_to_cap_if_null(ops, key_permission);
 	set_to_cap_if_null(ops, key_getsecurity);
+	set_to_cap_if_null(ops, key_session_to_parent);
 #endif	/* CONFIG_KEYS */
 #ifdef CONFIG_AUDIT
 	set_to_cap_if_null(ops, audit_rule_init);

diff --git a/security/commoncap.c b/security/commoncap.c
index e3097c0..fe30751 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c

@@ -101,7 +101,7 @@
 }
 
 /**
- * cap_ptrace_may_access - Determine whether the current process may access
+ * cap_ptrace_access_check - Determine whether the current process may access
  *			   another
  * @child: The process to be accessed
  * @mode: The mode of attachment.
@@ -109,7 +109,7 @@
  * Determine whether a process may access another, returning 0 if permission
  * granted, -ve if denied.
  */
-int cap_ptrace_may_access(struct task_struct *child, unsigned int mode)
+int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
 {
 	int ret = 0;
 

diff --git a/security/keys/Makefile b/security/keys/Makefile
index 747a464..74d5447 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile

@@ -3,6 +3,7 @@
 #
 
 obj-y := \
+	gc.o \
 	key.o \
 	keyring.o \
 	keyctl.o \

diff --git a/security/keys/compat.c b/security/keys/compat.c
index c766c68..792c0a6 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c

@@ -82,6 +82,9 @@
 	case KEYCTL_GET_SECURITY:
 		return keyctl_get_security(arg2, compat_ptr(arg3), arg4);
 
+	case KEYCTL_SESSION_TO_PARENT:
+		return keyctl_session_to_parent();
+
 	default:
 		return -EOPNOTSUPP;
 	}

diff --git a/security/keys/gc.c b/security/keys/gc.c
new file mode 100644
index 0000000..1e616ae
--- /dev/null
+++ b/security/keys/gc.c

@@ -0,0 +1,194 @@
+/* Key garbage collector
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <keys/keyring-type.h>
+#include "internal.h"
+
+/*
+ * Delay between key revocation/expiry in seconds
+ */
+unsigned key_gc_delay = 5 * 60;
+
+/*
+ * Reaper
+ */
+static void key_gc_timer_func(unsigned long);
+static void key_garbage_collector(struct work_struct *);
+static DEFINE_TIMER(key_gc_timer, key_gc_timer_func, 0, 0);
+static DECLARE_WORK(key_gc_work, key_garbage_collector);
+static key_serial_t key_gc_cursor; /* the last key the gc considered */
+static unsigned long key_gc_executing;
+static time_t key_gc_next_run = LONG_MAX;
+
+/*
+ * Schedule a garbage collection run
+ * - precision isn't particularly important
+ */
+void key_schedule_gc(time_t gc_at)
+{
+	unsigned long expires;
+	time_t now = current_kernel_time().tv_sec;
+
+	kenter("%ld", gc_at - now);
+
+	gc_at += key_gc_delay;
+
+	if (now >= gc_at) {
+		schedule_work(&key_gc_work);
+	} else if (gc_at < key_gc_next_run) {
+		expires = jiffies + (gc_at - now) * HZ;
+		mod_timer(&key_gc_timer, expires);
+	}
+}
+
+/*
+ * The garbage collector timer kicked off
+ */
+static void key_gc_timer_func(unsigned long data)
+{
+	kenter("");
+	key_gc_next_run = LONG_MAX;
+	schedule_work(&key_gc_work);
+}
+
+/*
+ * Garbage collect pointers from a keyring
+ * - return true if we altered the keyring
+ */
+static bool key_gc_keyring(struct key *keyring, time_t limit)
+	__releases(key_serial_lock)
+{
+	struct keyring_list *klist;
+	struct key *key;
+	int loop;
+
+	kenter("%x", key_serial(keyring));
+
+	if (test_bit(KEY_FLAG_REVOKED, &keyring->flags))
+		goto dont_gc;
+
+	/* scan the keyring looking for dead keys */
+	klist = rcu_dereference(keyring->payload.subscriptions);
+	if (!klist)
+		goto dont_gc;
+
+	for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+		key = klist->keys[loop];
+		if (test_bit(KEY_FLAG_DEAD, &key->flags) ||
+		    (key->expiry > 0 && key->expiry <= limit))
+			goto do_gc;
+	}
+
+dont_gc:
+	kleave(" = false");
+	return false;
+
+do_gc:
+	key_gc_cursor = keyring->serial;
+	key_get(keyring);
+	spin_unlock(&key_serial_lock);
+	keyring_gc(keyring, limit);
+	key_put(keyring);
+	kleave(" = true");
+	return true;
+}
+
+/*
+ * Garbage collector for keys
+ * - this involves scanning the keyrings for dead, expired and revoked keys
+ *   that have overstayed their welcome
+ */
+static void key_garbage_collector(struct work_struct *work)
+{
+	struct rb_node *rb;
+	key_serial_t cursor;
+	struct key *key, *xkey;
+	time_t new_timer = LONG_MAX, limit;
+
+	kenter("");
+
+	if (test_and_set_bit(0, &key_gc_executing)) {
+		key_schedule_gc(current_kernel_time().tv_sec);
+		return;
+	}
+
+	limit = current_kernel_time().tv_sec;
+	if (limit > key_gc_delay)
+		limit -= key_gc_delay;
+	else
+		limit = key_gc_delay;
+
+	spin_lock(&key_serial_lock);
+
+	if (RB_EMPTY_ROOT(&key_serial_tree))
+		goto reached_the_end;
+
+	cursor = key_gc_cursor;
+	if (cursor < 0)
+		cursor = 0;
+
+	/* find the first key above the cursor */
+	key = NULL;
+	rb = key_serial_tree.rb_node;
+	while (rb) {
+		xkey = rb_entry(rb, struct key, serial_node);
+		if (cursor < xkey->serial) {
+			key = xkey;
+			rb = rb->rb_left;
+		} else if (cursor > xkey->serial) {
+			rb = rb->rb_right;
+		} else {
+			rb = rb_next(rb);
+			if (!rb)
+				goto reached_the_end;
+			key = rb_entry(rb, struct key, serial_node);
+			break;
+		}
+	}
+
+	if (!key)
+		goto reached_the_end;
+
+	/* trawl through the keys looking for keyrings */
+	for (;;) {
+		if (key->expiry > 0 && key->expiry < new_timer)
+			new_timer = key->expiry;
+
+		if (key->type == &key_type_keyring &&
+		    key_gc_keyring(key, limit)) {
+			/* the gc ate our lock */
+			schedule_work(&key_gc_work);
+			goto no_unlock;
+		}
+
+		rb = rb_next(&key->serial_node);
+		if (!rb) {
+			key_gc_cursor = 0;
+			break;
+		}
+		key = rb_entry(rb, struct key, serial_node);
+	}
+
+out:
+	spin_unlock(&key_serial_lock);
+no_unlock:
+	clear_bit(0, &key_gc_executing);
+	if (new_timer < LONG_MAX)
+		key_schedule_gc(new_timer);
+
+	kleave("");
+	return;
+
+reached_the_end:
+	key_gc_cursor = 0;
+	goto out;
+}

diff --git a/security/keys/internal.h b/security/keys/internal.h
index 9fb679c..24ba030 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h

@@ -124,11 +124,18 @@
 					struct key *dest_keyring,
 					unsigned long flags);
 
-extern key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
+extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
 				 key_perm_t perm);
+#define KEY_LOOKUP_CREATE	0x01
+#define KEY_LOOKUP_PARTIAL	0x02
+#define KEY_LOOKUP_FOR_UNLINK	0x04
 
 extern long join_session_keyring(const char *name);
 
+extern unsigned key_gc_delay;
+extern void keyring_gc(struct key *keyring, time_t limit);
+extern void key_schedule_gc(time_t expiry_at);
+
 /*
  * check to see whether permission is granted to use a key in the desired way
  */
@@ -194,6 +201,7 @@
 extern long keyctl_assume_authority(key_serial_t);
 extern long keyctl_get_security(key_serial_t keyid, char __user *buffer,
 				size_t buflen);
+extern long keyctl_session_to_parent(void);
 
 /*
  * debugging key validation

diff --git a/security/keys/key.c b/security/keys/key.c
index 4a1297d..08531ad 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c

@@ -500,6 +500,7 @@
 		set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
 		now = current_kernel_time();
 		key->expiry = now.tv_sec + timeout;
+		key_schedule_gc(key->expiry);
 
 		if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags))
 			awaken = 1;
@@ -642,10 +643,8 @@
 	goto error;
 
  found:
-	/* pretend it doesn't exist if it's dead */
-	if (atomic_read(&key->usage) == 0 ||
-	    test_bit(KEY_FLAG_DEAD, &key->flags) ||
-	    key->type == &key_type_dead)
+	/* pretend it doesn't exist if it is awaiting deletion */
+	if (atomic_read(&key->usage) == 0)
 		goto not_found;
 
 	/* this races with key_put(), but that doesn't matter since key_put()
@@ -890,6 +889,9 @@
  */
 void key_revoke(struct key *key)
 {
+	struct timespec now;
+	time_t time;
+
 	key_check(key);
 
 	/* make sure no one's trying to change or use the key when we mark it
@@ -902,6 +904,14 @@
 	    key->type->revoke)
 		key->type->revoke(key);
 
+	/* set the death time to no more than the expiry time */
+	now = current_kernel_time();
+	time = now.tv_sec;
+	if (key->revoked_at == 0 || key->revoked_at > time) {
+		key->revoked_at = time;
+		key_schedule_gc(key->revoked_at);
+	}
+
 	up_write(&key->sem);
 
 } /* end key_revoke() */
@@ -958,8 +968,10 @@
 	for (_n = rb_first(&key_serial_tree); _n; _n = rb_next(_n)) {
 		key = rb_entry(_n, struct key, serial_node);
 
-		if (key->type == ktype)
+		if (key->type == ktype) {
 			key->type = &key_type_dead;
+			set_bit(KEY_FLAG_DEAD, &key->flags);
+		}
 	}
 
 	spin_unlock(&key_serial_lock);
@@ -984,6 +996,8 @@
 	spin_unlock(&key_serial_lock);
 	up_write(&key_types_sem);
 
+	key_schedule_gc(0);
+
 } /* end unregister_key_type() */
 
 EXPORT_SYMBOL(unregister_key_type);

diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 7f09fb8..74c9685 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c

@@ -103,7 +103,7 @@
 	}
 
 	/* find the target keyring (which must be writable) */
-	keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
+	keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error3;
@@ -185,7 +185,8 @@
 	/* get the destination keyring if specified */
 	dest_ref = NULL;
 	if (destringid) {
-		dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE);
+		dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE,
+					   KEY_WRITE);
 		if (IS_ERR(dest_ref)) {
 			ret = PTR_ERR(dest_ref);
 			goto error3;
@@ -233,9 +234,11 @@
 long keyctl_get_keyring_ID(key_serial_t id, int create)
 {
 	key_ref_t key_ref;
+	unsigned long lflags;
 	long ret;
 
-	key_ref = lookup_user_key(id, create, 0, KEY_SEARCH);
+	lflags = create ? KEY_LOOKUP_CREATE : 0;
+	key_ref = lookup_user_key(id, lflags, KEY_SEARCH);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error;
@@ -309,7 +312,7 @@
 	}
 
 	/* find the target key (which must be writable) */
-	key_ref = lookup_user_key(id, 0, 0, KEY_WRITE);
+	key_ref = lookup_user_key(id, 0, KEY_WRITE);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error2;
@@ -337,10 +340,16 @@
 	key_ref_t key_ref;
 	long ret;
 
-	key_ref = lookup_user_key(id, 0, 0, KEY_WRITE);
+	key_ref = lookup_user_key(id, 0, KEY_WRITE);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
-		goto error;
+		if (ret != -EACCES)
+			goto error;
+		key_ref = lookup_user_key(id, 0, KEY_SETATTR);
+		if (IS_ERR(key_ref)) {
+			ret = PTR_ERR(key_ref);
+			goto error;
+		}
 	}
 
 	key_revoke(key_ref_to_ptr(key_ref));
@@ -363,7 +372,7 @@
 	key_ref_t keyring_ref;
 	long ret;
 
-	keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
+	keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error;
@@ -389,13 +398,13 @@
 	key_ref_t keyring_ref, key_ref;
 	long ret;
 
-	keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
+	keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error;
 	}
 
-	key_ref = lookup_user_key(id, 1, 0, KEY_LINK);
+	key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_LINK);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error2;
@@ -423,13 +432,13 @@
 	key_ref_t keyring_ref, key_ref;
 	long ret;
 
-	keyring_ref = lookup_user_key(ringid, 0, 0, KEY_WRITE);
+	keyring_ref = lookup_user_key(ringid, 0, KEY_WRITE);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error;
 	}
 
-	key_ref = lookup_user_key(id, 0, 0, 0);
+	key_ref = lookup_user_key(id, KEY_LOOKUP_FOR_UNLINK, 0);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error2;
@@ -465,7 +474,7 @@
 	char *tmpbuf;
 	long ret;
 
-	key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW);
+	key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_VIEW);
 	if (IS_ERR(key_ref)) {
 		/* viewing a key under construction is permitted if we have the
 		 * authorisation token handy */
@@ -474,7 +483,8 @@
 			if (!IS_ERR(instkey)) {
 				key_put(instkey);
 				key_ref = lookup_user_key(keyid,
-							  0, 1, 0);
+							  KEY_LOOKUP_PARTIAL,
+							  0);
 				if (!IS_ERR(key_ref))
 					goto okay;
 			}
@@ -558,7 +568,7 @@
 	}
 
 	/* get the keyring at which to begin the search */
-	keyring_ref = lookup_user_key(ringid, 0, 0, KEY_SEARCH);
+	keyring_ref = lookup_user_key(ringid, 0, KEY_SEARCH);
 	if (IS_ERR(keyring_ref)) {
 		ret = PTR_ERR(keyring_ref);
 		goto error2;
@@ -567,7 +577,8 @@
 	/* get the destination keyring if specified */
 	dest_ref = NULL;
 	if (destringid) {
-		dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE);
+		dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE,
+					   KEY_WRITE);
 		if (IS_ERR(dest_ref)) {
 			ret = PTR_ERR(dest_ref);
 			goto error3;
@@ -637,7 +648,7 @@
 	long ret;
 
 	/* find the key first */
-	key_ref = lookup_user_key(keyid, 0, 0, 0);
+	key_ref = lookup_user_key(keyid, 0, 0);
 	if (IS_ERR(key_ref)) {
 		ret = -ENOKEY;
 		goto error;
@@ -700,7 +711,8 @@
 	if (uid == (uid_t) -1 && gid == (gid_t) -1)
 		goto error;
 
-	key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR);
+	key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
+				  KEY_SETATTR);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error;
@@ -805,7 +817,8 @@
 	if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL))
 		goto error;
 
-	key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR);
+	key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
+				  KEY_SETATTR);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error;
@@ -847,7 +860,7 @@
 
 	/* if a specific keyring is nominated by ID, then use that */
 	if (ringid > 0) {
-		dkref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
+		dkref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
 		if (IS_ERR(dkref))
 			return PTR_ERR(dkref);
 		*_dest_keyring = key_ref_to_ptr(dkref);
@@ -1083,7 +1096,8 @@
 	time_t expiry;
 	long ret;
 
-	key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR);
+	key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
+				  KEY_SETATTR);
 	if (IS_ERR(key_ref)) {
 		ret = PTR_ERR(key_ref);
 		goto error;
@@ -1101,6 +1115,7 @@
 	}
 
 	key->expiry = expiry;
+	key_schedule_gc(key->expiry);
 
 	up_write(&key->sem);
 	key_put(key);
@@ -1170,7 +1185,7 @@
 	char *context;
 	long ret;
 
-	key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW);
+	key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_VIEW);
 	if (IS_ERR(key_ref)) {
 		if (PTR_ERR(key_ref) != -EACCES)
 			return PTR_ERR(key_ref);
@@ -1182,7 +1197,7 @@
 			return PTR_ERR(key_ref);
 		key_put(instkey);
 
-		key_ref = lookup_user_key(keyid, 0, 1, 0);
+		key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, 0);
 		if (IS_ERR(key_ref))
 			return PTR_ERR(key_ref);
 	}
@@ -1213,6 +1228,105 @@
 	return ret;
 }
 
+/*
+ * attempt to install the calling process's session keyring on the process's
+ * parent process
+ * - the keyring must exist and must grant us LINK permission
+ * - implements keyctl(KEYCTL_SESSION_TO_PARENT)
+ */
+long keyctl_session_to_parent(void)
+{
+	struct task_struct *me, *parent;
+	const struct cred *mycred, *pcred;
+	struct cred *cred, *oldcred;
+	key_ref_t keyring_r;
+	int ret;
+
+	keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_LINK);
+	if (IS_ERR(keyring_r))
+		return PTR_ERR(keyring_r);
+
+	/* our parent is going to need a new cred struct, a new tgcred struct
+	 * and new security data, so we allocate them here to prevent ENOMEM in
+	 * our parent */
+	ret = -ENOMEM;
+	cred = cred_alloc_blank();
+	if (!cred)
+		goto error_keyring;
+
+	cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r);
+	keyring_r = NULL;
+
+	me = current;
+	write_lock_irq(&tasklist_lock);
+
+	parent = me->real_parent;
+	ret = -EPERM;
+
+	/* the parent mustn't be init and mustn't be a kernel thread */
+	if (parent->pid <= 1 || !parent->mm)
+		goto not_permitted;
+
+	/* the parent must be single threaded */
+	if (atomic_read(&parent->signal->count) != 1)
+		goto not_permitted;
+
+	/* the parent and the child must have different session keyrings or
+	 * there's no point */
+	mycred = current_cred();
+	pcred = __task_cred(parent);
+	if (mycred == pcred ||
+	    mycred->tgcred->session_keyring == pcred->tgcred->session_keyring)
+		goto already_same;
+
+	/* the parent must have the same effective ownership and mustn't be
+	 * SUID/SGID */
+	if (pcred-> uid	!= mycred->euid	||
+	    pcred->euid	!= mycred->euid	||
+	    pcred->suid	!= mycred->euid	||
+	    pcred-> gid	!= mycred->egid	||
+	    pcred->egid	!= mycred->egid	||
+	    pcred->sgid	!= mycred->egid)
+		goto not_permitted;
+
+	/* the keyrings must have the same UID */
+	if (pcred ->tgcred->session_keyring->uid != mycred->euid ||
+	    mycred->tgcred->session_keyring->uid != mycred->euid)
+		goto not_permitted;
+
+	/* the LSM must permit the replacement of the parent's keyring with the
+	 * keyring from this process */
+	ret = security_key_session_to_parent(mycred, pcred,
+					     key_ref_to_ptr(keyring_r));
+	if (ret < 0)
+		goto not_permitted;
+
+	/* if there's an already pending keyring replacement, then we replace
+	 * that */
+	oldcred = parent->replacement_session_keyring;
+
+	/* the replacement session keyring is applied just prior to userspace
+	 * restarting */
+	parent->replacement_session_keyring = cred;
+	cred = NULL;
+	set_ti_thread_flag(task_thread_info(parent), TIF_NOTIFY_RESUME);
+
+	write_unlock_irq(&tasklist_lock);
+	if (oldcred)
+		put_cred(oldcred);
+	return 0;
+
+already_same:
+	ret = 0;
+not_permitted:
+	put_cred(cred);
+	return ret;
+
+error_keyring:
+	key_ref_put(keyring_r);
+	return ret;
+}
+
 /*****************************************************************************/
 /*
  * the key control system call
@@ -1298,6 +1412,9 @@
 					   (char __user *) arg3,
 					   (size_t) arg4);
 
+	case KEYCTL_SESSION_TO_PARENT:
+		return keyctl_session_to_parent();
+
 	default:
 		return -EOPNOTSUPP;
 	}

diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 3dba81c..ac977f6 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c

@@ -1000,3 +1000,88 @@
 	}
 
 } /* end keyring_revoke() */
+
+/*
+ * Determine whether a key is dead
+ */
+static bool key_is_dead(struct key *key, time_t limit)
+{
+	return test_bit(KEY_FLAG_DEAD, &key->flags) ||
+		(key->expiry > 0 && key->expiry <= limit);
+}
+
+/*
+ * Collect garbage from the contents of a keyring
+ */
+void keyring_gc(struct key *keyring, time_t limit)
+{
+	struct keyring_list *klist, *new;
+	struct key *key;
+	int loop, keep, max;
+
+	kenter("%x", key_serial(keyring));
+
+	down_write(&keyring->sem);
+
+	klist = keyring->payload.subscriptions;
+	if (!klist)
+		goto just_return;
+
+	/* work out how many subscriptions we're keeping */
+	keep = 0;
+	for (loop = klist->nkeys - 1; loop >= 0; loop--)
+		if (!key_is_dead(klist->keys[loop], limit));
+			keep++;
+
+	if (keep == klist->nkeys)
+		goto just_return;
+
+	/* allocate a new keyring payload */
+	max = roundup(keep, 4);
+	new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *),
+		      GFP_KERNEL);
+	if (!new)
+		goto just_return;
+	new->maxkeys = max;
+	new->nkeys = 0;
+	new->delkey = 0;
+
+	/* install the live keys
+	 * - must take care as expired keys may be updated back to life
+	 */
+	keep = 0;
+	for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+		key = klist->keys[loop];
+		if (!key_is_dead(key, limit)) {
+			if (keep >= max)
+				goto discard_new;
+			new->keys[keep++] = key_get(key);
+		}
+	}
+	new->nkeys = keep;
+
+	/* adjust the quota */
+	key_payload_reserve(keyring,
+			    sizeof(struct keyring_list) +
+			    KEYQUOTA_LINK_BYTES * keep);
+
+	if (keep == 0) {
+		rcu_assign_pointer(keyring->payload.subscriptions, NULL);
+		kfree(new);
+	} else {
+		rcu_assign_pointer(keyring->payload.subscriptions, new);
+	}
+
+	up_write(&keyring->sem);
+
+	call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
+	kleave(" [yes]");
+	return;
+
+discard_new:
+	new->nkeys = keep;
+	keyring_clear_rcu_disposal(&new->rcu);
+just_return:
+	up_write(&keyring->sem);
+	kleave(" [no]");
+}

diff --git a/security/keys/proc.c b/security/keys/proc.c
index 769f9bd..9d01021 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c

@@ -91,59 +91,94 @@
  */
 #ifdef CONFIG_KEYS_DEBUG_PROC_KEYS
 
-static struct rb_node *__key_serial_next(struct rb_node *n)
+static struct rb_node *key_serial_next(struct rb_node *n)
 {
+	struct user_namespace *user_ns = current_user_ns();
+
+	n = rb_next(n);
 	while (n) {
 		struct key *key = rb_entry(n, struct key, serial_node);
-		if (key->user->user_ns == current_user_ns())
+		if (key->user->user_ns == user_ns)
 			break;
 		n = rb_next(n);
 	}
 	return n;
 }
 
-static struct rb_node *key_serial_next(struct rb_node *n)
-{
-	return __key_serial_next(rb_next(n));
-}
-
-static struct rb_node *key_serial_first(struct rb_root *r)
-{
-	struct rb_node *n = rb_first(r);
-	return __key_serial_next(n);
-}
-
 static int proc_keys_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &proc_keys_ops);
+}
 
+static struct key *find_ge_key(key_serial_t id)
+{
+	struct user_namespace *user_ns = current_user_ns();
+	struct rb_node *n = key_serial_tree.rb_node;
+	struct key *minkey = NULL;
+
+	while (n) {
+		struct key *key = rb_entry(n, struct key, serial_node);
+		if (id < key->serial) {
+			if (!minkey || minkey->serial > key->serial)
+				minkey = key;
+			n = n->rb_left;
+		} else if (id > key->serial) {
+			n = n->rb_right;
+		} else {
+			minkey = key;
+			break;
+		}
+		key = NULL;
+	}
+
+	if (!minkey)
+		return NULL;
+
+	for (;;) {
+		if (minkey->user->user_ns == user_ns)
+			return minkey;
+		n = rb_next(&minkey->serial_node);
+		if (!n)
+			return NULL;
+		minkey = rb_entry(n, struct key, serial_node);
+	}
 }
 
 static void *proc_keys_start(struct seq_file *p, loff_t *_pos)
+	__acquires(key_serial_lock)
 {
-	struct rb_node *_p;
-	loff_t pos = *_pos;
+	key_serial_t pos = *_pos;
+	struct key *key;
 
 	spin_lock(&key_serial_lock);
 
-	_p = key_serial_first(&key_serial_tree);
-	while (pos > 0 && _p) {
-		pos--;
-		_p = key_serial_next(_p);
-	}
+	if (*_pos > INT_MAX)
+		return NULL;
+	key = find_ge_key(pos);
+	if (!key)
+		return NULL;
+	*_pos = key->serial;
+	return &key->serial_node;
+}
 
-	return _p;
-
+static inline key_serial_t key_node_serial(struct rb_node *n)
+{
+	struct key *key = rb_entry(n, struct key, serial_node);
+	return key->serial;
 }
 
 static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos)
 {
-	(*_pos)++;
-	return key_serial_next((struct rb_node *) v);
+	struct rb_node *n;
 
+	n = key_serial_next(v);
+	if (n)
+		*_pos = key_node_serial(n);
+	return n;
 }
 
 static void proc_keys_stop(struct seq_file *p, void *v)
+	__releases(key_serial_lock)
 {
 	spin_unlock(&key_serial_lock);
 }
@@ -174,11 +209,9 @@
 	/* come up with a suitable timeout value */
 	if (key->expiry == 0) {
 		memcpy(xbuf, "perm", 5);
-	}
-	else if (now.tv_sec >= key->expiry) {
+	} else if (now.tv_sec >= key->expiry) {
 		memcpy(xbuf, "expd", 5);
-	}
-	else {
+	} else {
 		timo = key->expiry - now.tv_sec;
 
 		if (timo < 60)
@@ -218,9 +251,7 @@
 	seq_putc(m, '\n');
 
 	rcu_read_unlock();
-
 	return 0;
-
 }
 
 #endif /* CONFIG_KEYS_DEBUG_PROC_KEYS */
@@ -246,6 +277,7 @@
 	struct rb_node *n = rb_first(r);
 	return __key_user_next(n);
 }
+
 /*****************************************************************************/
 /*
  * implement "/proc/key-users" to provides a list of the key users
@@ -253,10 +285,10 @@
 static int proc_key_users_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &proc_key_users_ops);
-
 }
 
 static void *proc_key_users_start(struct seq_file *p, loff_t *_pos)
+	__acquires(key_user_lock)
 {
 	struct rb_node *_p;
 	loff_t pos = *_pos;
@@ -270,17 +302,16 @@
 	}
 
 	return _p;
-
 }
 
 static void *proc_key_users_next(struct seq_file *p, void *v, loff_t *_pos)
 {
 	(*_pos)++;
 	return key_user_next((struct rb_node *) v);
-
 }
 
 static void proc_key_users_stop(struct seq_file *p, void *v)
+	__releases(key_user_lock)
 {
 	spin_unlock(&key_user_lock);
 }

diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 276d278..5c23afb 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c

@@ -17,6 +17,7 @@
 #include <linux/fs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
+#include <linux/security.h>
 #include <linux/user_namespace.h>
 #include <asm/uaccess.h>
 #include "internal.h"
@@ -487,7 +488,7 @@
  * - don't create special keyrings unless so requested
  * - partially constructed keys aren't found unless requested
  */
-key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
+key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
 			  key_perm_t perm)
 {
 	struct request_key_auth *rka;
@@ -503,7 +504,7 @@
 	switch (id) {
 	case KEY_SPEC_THREAD_KEYRING:
 		if (!cred->thread_keyring) {
-			if (!create)
+			if (!(lflags & KEY_LOOKUP_CREATE))
 				goto error;
 
 			ret = install_thread_keyring();
@@ -521,7 +522,7 @@
 
 	case KEY_SPEC_PROCESS_KEYRING:
 		if (!cred->tgcred->process_keyring) {
-			if (!create)
+			if (!(lflags & KEY_LOOKUP_CREATE))
 				goto error;
 
 			ret = install_process_keyring();
@@ -642,7 +643,14 @@
 		break;
 	}
 
-	if (!partial) {
+	/* unlink does not use the nominated key in any way, so can skip all
+	 * the permission checks as it is only concerned with the keyring */
+	if (lflags & KEY_LOOKUP_FOR_UNLINK) {
+		ret = 0;
+		goto error;
+	}
+
+	if (!(lflags & KEY_LOOKUP_PARTIAL)) {
 		ret = wait_for_key_construction(key, true);
 		switch (ret) {
 		case -ERESTARTSYS:
@@ -660,7 +668,8 @@
 	}
 
 	ret = -EIO;
-	if (!partial && !test_bit(KEY_FLAG_INSTANTIATED, &key->flags))
+	if (!(lflags & KEY_LOOKUP_PARTIAL) &&
+	    !test_bit(KEY_FLAG_INSTANTIATED, &key->flags))
 		goto invalid_key;
 
 	/* check the permissions */
@@ -702,7 +711,7 @@
 	/* only permit this if there's a single thread in the thread group -
 	 * this avoids us having to adjust the creds on all threads and risking
 	 * ENOMEM */
-	if (!is_single_threaded(current))
+	if (!current_is_single_threaded())
 		return -EMLINK;
 
 	new = prepare_creds();
@@ -760,3 +769,51 @@
 	abort_creds(new);
 	return ret;
 }
+
+/*
+ * Replace a process's session keyring when that process resumes userspace on
+ * behalf of one of its children
+ */
+void key_replace_session_keyring(void)
+{
+	const struct cred *old;
+	struct cred *new;
+
+	if (!current->replacement_session_keyring)
+		return;
+
+	write_lock_irq(&tasklist_lock);
+	new = current->replacement_session_keyring;
+	current->replacement_session_keyring = NULL;
+	write_unlock_irq(&tasklist_lock);
+
+	if (!new)
+		return;
+
+	old = current_cred();
+	new->  uid	= old->  uid;
+	new-> euid	= old-> euid;
+	new-> suid	= old-> suid;
+	new->fsuid	= old->fsuid;
+	new->  gid	= old->  gid;
+	new-> egid	= old-> egid;
+	new-> sgid	= old-> sgid;
+	new->fsgid	= old->fsgid;
+	new->user	= get_uid(old->user);
+	new->group_info	= get_group_info(old->group_info);
+
+	new->securebits	= old->securebits;
+	new->cap_inheritable	= old->cap_inheritable;
+	new->cap_permitted	= old->cap_permitted;
+	new->cap_effective	= old->cap_effective;
+	new->cap_bset		= old->cap_bset;
+
+	new->jit_keyring	= old->jit_keyring;
+	new->thread_keyring	= key_get(old->thread_keyring);
+	new->tgcred->tgid	= old->tgcred->tgid;
+	new->tgcred->process_keyring = key_get(old->tgcred->process_keyring);
+
+	security_transfer_creds(new, old);
+
+	commit_creds(new);
+}

diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c
index b611d49..5e05dc0 100644
--- a/security/keys/sysctl.c
+++ b/security/keys/sysctl.c

@@ -13,6 +13,8 @@
 #include <linux/sysctl.h>
 #include "internal.h"
 
+static const int zero, one = 1, max = INT_MAX;
+
 ctl_table key_sysctls[] = {
 	{
 		.ctl_name = CTL_UNNUMBERED,
@@ -20,7 +22,9 @@
 		.data = &key_quota_maxkeys,
 		.maxlen = sizeof(unsigned),
 		.mode = 0644,
-		.proc_handler = &proc_dointvec,
+		.proc_handler = &proc_dointvec_minmax,
+		.extra1 = (void *) &one,
+		.extra2 = (void *) &max,
 	},
 	{
 		.ctl_name = CTL_UNNUMBERED,
@@ -28,7 +32,9 @@
 		.data = &key_quota_maxbytes,
 		.maxlen = sizeof(unsigned),
 		.mode = 0644,
-		.proc_handler = &proc_dointvec,
+		.proc_handler = &proc_dointvec_minmax,
+		.extra1 = (void *) &one,
+		.extra2 = (void *) &max,
 	},
 	{
 		.ctl_name = CTL_UNNUMBERED,
@@ -36,7 +42,9 @@
 		.data = &key_quota_root_maxkeys,
 		.maxlen = sizeof(unsigned),
 		.mode = 0644,
-		.proc_handler = &proc_dointvec,
+		.proc_handler = &proc_dointvec_minmax,
+		.extra1 = (void *) &one,
+		.extra2 = (void *) &max,
 	},
 	{
 		.ctl_name = CTL_UNNUMBERED,
@@ -44,7 +52,19 @@
 		.data = &key_quota_root_maxbytes,
 		.maxlen = sizeof(unsigned),
 		.mode = 0644,
-		.proc_handler = &proc_dointvec,
+		.proc_handler = &proc_dointvec_minmax,
+		.extra1 = (void *) &one,
+		.extra2 = (void *) &max,
+	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "gc_delay",
+		.data = &key_gc_delay,
+		.maxlen = sizeof(unsigned),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_minmax,
+		.extra1 = (void *) &zero,
+		.extra2 = (void *) &max,
 	},
 	{ .ctl_name = 0 }
 };

diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 94b8684..500aad0 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c

@@ -220,6 +220,8 @@
 	}
 
 	switch (a->type) {
+	case LSM_AUDIT_NO_AUDIT:
+		return;
 	case LSM_AUDIT_DATA_IPC:
 		audit_log_format(ab, " key=%d ", a->u.ipc_id);
 		break;

diff --git a/security/security.c b/security/security.c
index dc7674f..c4c6732 100644
--- a/security/security.c
+++ b/security/security.c

@@ -124,9 +124,9 @@
 
 /* Security operations */
 
-int security_ptrace_may_access(struct task_struct *child, unsigned int mode)
+int security_ptrace_access_check(struct task_struct *child, unsigned int mode)
 {
-	return security_ops->ptrace_may_access(child, mode);
+	return security_ops->ptrace_access_check(child, mode);
 }
 
 int security_ptrace_traceme(struct task_struct *parent)
@@ -684,6 +684,11 @@
 	return security_ops->task_create(clone_flags);
 }
 
+int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+	return security_ops->cred_alloc_blank(cred, gfp);
+}
+
 void security_cred_free(struct cred *cred)
 {
 	security_ops->cred_free(cred);
@@ -699,6 +704,11 @@
 	security_ops->cred_commit(new, old);
 }
 
+void security_transfer_creds(struct cred *new, const struct cred *old)
+{
+	security_ops->cred_transfer(new, old);
+}
+
 int security_kernel_act_as(struct cred *new, u32 secid)
 {
 	return security_ops->kernel_act_as(new, secid);
@@ -709,6 +719,11 @@
 	return security_ops->kernel_create_files_as(new, inode);
 }
 
+int security_kernel_module_request(void)
+{
+	return security_ops->kernel_module_request();
+}
+
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
 {
 	return security_ops->task_setuid(id0, id1, id2, flags);
@@ -959,6 +974,24 @@
 }
 EXPORT_SYMBOL(security_release_secctx);
 
+int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+	return security_ops->inode_notifysecctx(inode, ctx, ctxlen);
+}
+EXPORT_SYMBOL(security_inode_notifysecctx);
+
+int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+	return security_ops->inode_setsecctx(dentry, ctx, ctxlen);
+}
+EXPORT_SYMBOL(security_inode_setsecctx);
+
+int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+	return security_ops->inode_getsecctx(inode, ctx, ctxlen);
+}
+EXPORT_SYMBOL(security_inode_getsecctx);
+
 #ifdef CONFIG_SECURITY_NETWORK
 
 int security_unix_stream_connect(struct socket *sock, struct socket *other,
@@ -1112,6 +1145,24 @@
 	security_ops->inet_conn_established(sk, skb);
 }
 
+int security_tun_dev_create(void)
+{
+	return security_ops->tun_dev_create();
+}
+EXPORT_SYMBOL(security_tun_dev_create);
+
+void security_tun_dev_post_create(struct sock *sk)
+{
+	return security_ops->tun_dev_post_create(sk);
+}
+EXPORT_SYMBOL(security_tun_dev_post_create);
+
+int security_tun_dev_attach(struct sock *sk)
+{
+	return security_ops->tun_dev_attach(sk);
+}
+EXPORT_SYMBOL(security_tun_dev_attach);
+
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -1218,6 +1269,13 @@
 	return security_ops->key_getsecurity(key, _buffer);
 }
 
+int security_key_session_to_parent(const struct cred *cred,
+				   const struct cred *parent_cred,
+				   struct key *key)
+{
+	return security_ops->key_session_to_parent(cred, parent_cred, key);
+}
+
 #endif	/* CONFIG_KEYS */
 
 #ifdef CONFIG_AUDIT

diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index b2ab608..e3d1901 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c

@@ -137,7 +137,7 @@
  * @tclass: target security class
  * @av: access vector
  */
-void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
+static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
 {
 	const char **common_pts = NULL;
 	u32 common_base = 0;
@@ -492,23 +492,35 @@
 	return node;
 }
 
-static inline void avc_print_ipv6_addr(struct audit_buffer *ab,
-				       struct in6_addr *addr, __be16 port,
-				       char *name1, char *name2)
+/**
+ * avc_audit_pre_callback - SELinux specific information
+ * will be called by generic audit code
+ * @ab: the audit buffer
+ * @a: audit_data
+ */
+static void avc_audit_pre_callback(struct audit_buffer *ab, void *a)
 {
-	if (!ipv6_addr_any(addr))
-		audit_log_format(ab, " %s=%pI6", name1, addr);
-	if (port)
-		audit_log_format(ab, " %s=%d", name2, ntohs(port));
+	struct common_audit_data *ad = a;
+	audit_log_format(ab, "avc:  %s ",
+			 ad->selinux_audit_data.denied ? "denied" : "granted");
+	avc_dump_av(ab, ad->selinux_audit_data.tclass,
+			ad->selinux_audit_data.audited);
+	audit_log_format(ab, " for ");
 }
 
-static inline void avc_print_ipv4_addr(struct audit_buffer *ab, __be32 addr,
-				       __be16 port, char *name1, char *name2)
+/**
+ * avc_audit_post_callback - SELinux specific information
+ * will be called by generic audit code
+ * @ab: the audit buffer
+ * @a: audit_data
+ */
+static void avc_audit_post_callback(struct audit_buffer *ab, void *a)
 {
-	if (addr)
-		audit_log_format(ab, " %s=%pI4", name1, &addr);
-	if (port)
-		audit_log_format(ab, " %s=%d", name2, ntohs(port));
+	struct common_audit_data *ad = a;
+	audit_log_format(ab, " ");
+	avc_dump_query(ab, ad->selinux_audit_data.ssid,
+			   ad->selinux_audit_data.tsid,
+			   ad->selinux_audit_data.tclass);
 }
 
 /**
@@ -532,13 +544,10 @@
  */
 void avc_audit(u32 ssid, u32 tsid,
 	       u16 tclass, u32 requested,
-	       struct av_decision *avd, int result, struct avc_audit_data *a)
+	       struct av_decision *avd, int result, struct common_audit_data *a)
 {
-	struct task_struct *tsk = current;
-	struct inode *inode = NULL;
+	struct common_audit_data stack_data;
 	u32 denied, audited;
-	struct audit_buffer *ab;
-
 	denied = requested & ~avd->allowed;
 	if (denied) {
 		audited = denied;
@@ -551,144 +560,20 @@
 		if (!(audited & avd->auditallow))
 			return;
 	}
-
-	ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC);
-	if (!ab)
-		return;		/* audit_panic has been called */
-	audit_log_format(ab, "avc:  %s ", denied ? "denied" : "granted");
-	avc_dump_av(ab, tclass, audited);
-	audit_log_format(ab, " for ");
-	if (a && a->tsk)
-		tsk = a->tsk;
-	if (tsk && tsk->pid) {
-		audit_log_format(ab, " pid=%d comm=", tsk->pid);
-		audit_log_untrustedstring(ab, tsk->comm);
+	if (!a) {
+		a = &stack_data;
+		memset(a, 0, sizeof(*a));
+		a->type = LSM_AUDIT_NO_AUDIT;
 	}
-	if (a) {
-		switch (a->type) {
-		case AVC_AUDIT_DATA_IPC:
-			audit_log_format(ab, " key=%d", a->u.ipc_id);
-			break;
-		case AVC_AUDIT_DATA_CAP:
-			audit_log_format(ab, " capability=%d", a->u.cap);
-			break;
-		case AVC_AUDIT_DATA_FS:
-			if (a->u.fs.path.dentry) {
-				struct dentry *dentry = a->u.fs.path.dentry;
-				if (a->u.fs.path.mnt) {
-					audit_log_d_path(ab, "path=",
-							 &a->u.fs.path);
-				} else {
-					audit_log_format(ab, " name=");
-					audit_log_untrustedstring(ab, dentry->d_name.name);
-				}
-				inode = dentry->d_inode;
-			} else if (a->u.fs.inode) {
-				struct dentry *dentry;
-				inode = a->u.fs.inode;
-				dentry = d_find_alias(inode);
-				if (dentry) {
-					audit_log_format(ab, " name=");
-					audit_log_untrustedstring(ab, dentry->d_name.name);
-					dput(dentry);
-				}
-			}
-			if (inode)
-				audit_log_format(ab, " dev=%s ino=%lu",
-						 inode->i_sb->s_id,
-						 inode->i_ino);
-			break;
-		case AVC_AUDIT_DATA_NET:
-			if (a->u.net.sk) {
-				struct sock *sk = a->u.net.sk;
-				struct unix_sock *u;
-				int len = 0;
-				char *p = NULL;
-
-				switch (sk->sk_family) {
-				case AF_INET: {
-					struct inet_sock *inet = inet_sk(sk);
-
-					avc_print_ipv4_addr(ab, inet->rcv_saddr,
-							    inet->sport,
-							    "laddr", "lport");
-					avc_print_ipv4_addr(ab, inet->daddr,
-							    inet->dport,
-							    "faddr", "fport");
-					break;
-				}
-				case AF_INET6: {
-					struct inet_sock *inet = inet_sk(sk);
-					struct ipv6_pinfo *inet6 = inet6_sk(sk);
-
-					avc_print_ipv6_addr(ab, &inet6->rcv_saddr,
-							    inet->sport,
-							    "laddr", "lport");
-					avc_print_ipv6_addr(ab, &inet6->daddr,
-							    inet->dport,
-							    "faddr", "fport");
-					break;
-				}
-				case AF_UNIX:
-					u = unix_sk(sk);
-					if (u->dentry) {
-						struct path path = {
-							.dentry = u->dentry,
-							.mnt = u->mnt
-						};
-						audit_log_d_path(ab, "path=",
-								 &path);
-						break;
-					}
-					if (!u->addr)
-						break;
-					len = u->addr->len-sizeof(short);
-					p = &u->addr->name->sun_path[0];
-					audit_log_format(ab, " path=");
-					if (*p)
-						audit_log_untrustedstring(ab, p);
-					else
-						audit_log_n_hex(ab, p, len);
-					break;
-				}
-			}
-
-			switch (a->u.net.family) {
-			case AF_INET:
-				avc_print_ipv4_addr(ab, a->u.net.v4info.saddr,
-						    a->u.net.sport,
-						    "saddr", "src");
-				avc_print_ipv4_addr(ab, a->u.net.v4info.daddr,
-						    a->u.net.dport,
-						    "daddr", "dest");
-				break;
-			case AF_INET6:
-				avc_print_ipv6_addr(ab, &a->u.net.v6info.saddr,
-						    a->u.net.sport,
-						    "saddr", "src");
-				avc_print_ipv6_addr(ab, &a->u.net.v6info.daddr,
-						    a->u.net.dport,
-						    "daddr", "dest");
-				break;
-			}
-			if (a->u.net.netif > 0) {
-				struct net_device *dev;
-
-				/* NOTE: we always use init's namespace */
-				dev = dev_get_by_index(&init_net,
-						       a->u.net.netif);
-				if (dev) {
-					audit_log_format(ab, " netif=%s",
-							 dev->name);
-					dev_put(dev);
-				}
-			}
-			break;
-		}
-	}
-	audit_log_format(ab, " ");
-	avc_dump_query(ab, ssid, tsid, tclass);
-	audit_log_end(ab);
+	a->selinux_audit_data.tclass = tclass;
+	a->selinux_audit_data.requested = requested;
+	a->selinux_audit_data.ssid = ssid;
+	a->selinux_audit_data.tsid = tsid;
+	a->selinux_audit_data.audited = audited;
+	a->selinux_audit_data.denied = denied;
+	a->lsm_pre_audit = avc_audit_pre_callback;
+	a->lsm_post_audit = avc_audit_post_callback;
+	common_lsm_audit(a);
 }
 
 /**
@@ -956,7 +841,7 @@
  * another -errno upon other errors.
  */
 int avc_has_perm(u32 ssid, u32 tsid, u16 tclass,
-		 u32 requested, struct avc_audit_data *auditdata)
+		 u32 requested, struct common_audit_data *auditdata)
 {
 	struct av_decision avd;
 	int rc;
@@ -970,3 +855,9 @@
 {
 	return avc_cache.latest_notif;
 }
+
+void avc_disable(void)
+{
+	if (avc_node_cachep)
+		kmem_cache_destroy(avc_node_cachep);
+}

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 8d8b69c..417f7c9 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c

@@ -13,8 +13,8 @@
  *					   Eric Paris <eparis@redhat.com>
  *  Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
  *			    <dgoeddel@trustedcs.com>
- *  Copyright (C) 2006, 2007 Hewlett-Packard Development Company, L.P.
- *		Paul Moore <paul.moore@hp.com>
+ *  Copyright (C) 2006, 2007, 2009 Hewlett-Packard Development Company, L.P.
+ *	Paul Moore <paul.moore@hp.com>
  *  Copyright (C) 2007 Hitachi Software Engineering Co., Ltd.
  *		       Yuichi Nakamura <ynakam@hitachisoft.jp>
  *
@@ -448,6 +448,10 @@
 	    sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
 		sbsec->flags &= ~SE_SBLABELSUPP;
 
+	/* Special handling for sysfs. Is genfs but also has setxattr handler*/
+	if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
+		sbsec->flags |= SE_SBLABELSUPP;
+
 	/* Initialize the root inode. */
 	rc = inode_doinit_with_dentry(root_inode, root);
 
@@ -1479,14 +1483,14 @@
 			       const struct cred *cred,
 			       int cap, int audit)
 {
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	struct av_decision avd;
 	u16 sclass;
 	u32 sid = cred_sid(cred);
 	u32 av = CAP_TO_MASK(cap);
 	int rc;
 
-	AVC_AUDIT_DATA_INIT(&ad, CAP);
+	COMMON_AUDIT_DATA_INIT(&ad, CAP);
 	ad.tsk = tsk;
 	ad.u.cap = cap;
 
@@ -1525,12 +1529,14 @@
 static int inode_has_perm(const struct cred *cred,
 			  struct inode *inode,
 			  u32 perms,
-			  struct avc_audit_data *adp)
+			  struct common_audit_data *adp)
 {
 	struct inode_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid;
 
+	validate_creds(cred);
+
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
 
@@ -1539,7 +1545,7 @@
 
 	if (!adp) {
 		adp = &ad;
-		AVC_AUDIT_DATA_INIT(&ad, FS);
+		COMMON_AUDIT_DATA_INIT(&ad, FS);
 		ad.u.fs.inode = inode;
 	}
 
@@ -1555,9 +1561,9 @@
 				  u32 av)
 {
 	struct inode *inode = dentry->d_inode;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path.mnt = mnt;
 	ad.u.fs.path.dentry = dentry;
 	return inode_has_perm(cred, inode, av, &ad);
@@ -1577,11 +1583,11 @@
 {
 	struct file_security_struct *fsec = file->f_security;
 	struct inode *inode = file->f_path.dentry->d_inode;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = cred_sid(cred);
 	int rc;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path = file->f_path;
 
 	if (sid != fsec->sid) {
@@ -1612,7 +1618,7 @@
 	struct inode_security_struct *dsec;
 	struct superblock_security_struct *sbsec;
 	u32 sid, newsid;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	int rc;
 
 	dsec = dir->i_security;
@@ -1621,7 +1627,7 @@
 	sid = tsec->sid;
 	newsid = tsec->create_sid;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path.dentry = dentry;
 
 	rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR,
@@ -1665,7 +1671,7 @@
 
 {
 	struct inode_security_struct *dsec, *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 	u32 av;
 	int rc;
@@ -1673,7 +1679,7 @@
 	dsec = dir->i_security;
 	isec = dentry->d_inode->i_security;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path.dentry = dentry;
 
 	av = DIR__SEARCH;
@@ -1708,7 +1714,7 @@
 			     struct dentry *new_dentry)
 {
 	struct inode_security_struct *old_dsec, *new_dsec, *old_isec, *new_isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 	u32 av;
 	int old_is_dir, new_is_dir;
@@ -1719,7 +1725,7 @@
 	old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
 	new_dsec = new_dir->i_security;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 
 	ad.u.fs.path.dentry = old_dentry;
 	rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR,
@@ -1761,7 +1767,7 @@
 static int superblock_has_perm(const struct cred *cred,
 			       struct super_block *sb,
 			       u32 perms,
-			       struct avc_audit_data *ad)
+			       struct common_audit_data *ad)
 {
 	struct superblock_security_struct *sbsec;
 	u32 sid = cred_sid(cred);
@@ -1855,12 +1861,12 @@
 
 /* Hook functions begin here. */
 
-static int selinux_ptrace_may_access(struct task_struct *child,
+static int selinux_ptrace_access_check(struct task_struct *child,
 				     unsigned int mode)
 {
 	int rc;
 
-	rc = cap_ptrace_may_access(child, mode);
+	rc = cap_ptrace_access_check(child, mode);
 	if (rc)
 		return rc;
 
@@ -2101,7 +2107,7 @@
 	const struct task_security_struct *old_tsec;
 	struct task_security_struct *new_tsec;
 	struct inode_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	struct inode *inode = bprm->file->f_path.dentry->d_inode;
 	int rc;
 
@@ -2139,7 +2145,7 @@
 			return rc;
 	}
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path = bprm->file->f_path;
 
 	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
@@ -2232,7 +2238,7 @@
 static inline void flush_unauthorized_files(const struct cred *cred,
 					    struct files_struct *files)
 {
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	struct file *file, *devnull = NULL;
 	struct tty_struct *tty;
 	struct fdtable *fdt;
@@ -2266,7 +2272,7 @@
 
 	/* Revalidate access to inherited open files. */
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 
 	spin_lock(&files->file_lock);
 	for (;;) {
@@ -2515,7 +2521,7 @@
 static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
 {
 	const struct cred *cred = current_cred();
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	int rc;
 
 	rc = superblock_doinit(sb, data);
@@ -2526,7 +2532,7 @@
 	if (flags & MS_KERNMOUNT)
 		return 0;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path.dentry = sb->s_root;
 	return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad);
 }
@@ -2534,9 +2540,9 @@
 static int selinux_sb_statfs(struct dentry *dentry)
 {
 	const struct cred *cred = current_cred();
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path.dentry = dentry->d_sb->s_root;
 	return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad);
 }
@@ -2711,12 +2717,18 @@
 static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	const struct cred *cred = current_cred();
+	unsigned int ia_valid = iattr->ia_valid;
 
-	if (iattr->ia_valid & ATTR_FORCE)
-		return 0;
+	/* ATTR_FORCE is just used for ATTR_KILL_S[UG]ID. */
+	if (ia_valid & ATTR_FORCE) {
+		ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_MODE |
+			      ATTR_FORCE);
+		if (!ia_valid)
+			return 0;
+	}
 
-	if (iattr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
-			       ATTR_ATIME_SET | ATTR_MTIME_SET))
+	if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
+			ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET))
 		return dentry_has_perm(cred, NULL, dentry, FILE__SETATTR);
 
 	return dentry_has_perm(cred, NULL, dentry, FILE__WRITE);
@@ -2756,7 +2768,7 @@
 	struct inode *inode = dentry->d_inode;
 	struct inode_security_struct *isec = inode->i_security;
 	struct superblock_security_struct *sbsec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 newsid, sid = current_sid();
 	int rc = 0;
 
@@ -2770,7 +2782,7 @@
 	if (!is_owner_or_cap(inode))
 		return -EPERM;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path.dentry = dentry;
 
 	rc = avc_has_perm(sid, isec->sid, isec->sclass,
@@ -2915,6 +2927,7 @@
 		return rc;
 
 	isec->sid = newsid;
+	isec->initialized = 1;
 	return 0;
 }
 
@@ -2939,11 +2952,6 @@
 	const struct cred *cred = current_cred();
 	struct inode *inode = file->f_path.dentry->d_inode;
 
-	if (!mask) {
-		/* No permission to check.  Existence test. */
-		return 0;
-	}
-
 	/* file_mask_to_av won't add FILE__WRITE if MAY_APPEND is set */
 	if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE))
 		mask |= MAY_APPEND;
@@ -2954,10 +2962,20 @@
 
 static int selinux_file_permission(struct file *file, int mask)
 {
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct file_security_struct *fsec = file->f_security;
+	struct inode_security_struct *isec = inode->i_security;
+	u32 sid = current_sid();
+
 	if (!mask)
 		/* No permission to check.  Existence test. */
 		return 0;
 
+	if (sid == fsec->sid && fsec->isid == isec->sid &&
+	    fsec->pseqno == avc_policy_seqno())
+		/* No change since dentry_open check. */
+		return 0;
+
 	return selinux_revalidate_file_permission(file, mask);
 }
 
@@ -3220,12 +3238,29 @@
 }
 
 /*
+ * allocate the SELinux part of blank credentials
+ */
+static int selinux_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+	struct task_security_struct *tsec;
+
+	tsec = kzalloc(sizeof(struct task_security_struct), gfp);
+	if (!tsec)
+		return -ENOMEM;
+
+	cred->security = tsec;
+	return 0;
+}
+
+/*
  * detach and free the LSM part of a set of credentials
  */
 static void selinux_cred_free(struct cred *cred)
 {
 	struct task_security_struct *tsec = cred->security;
-	cred->security = NULL;
+
+	BUG_ON((unsigned long) cred->security < PAGE_SIZE);
+	cred->security = (void *) 0x7UL;
 	kfree(tsec);
 }
 
@@ -3249,6 +3284,17 @@
 }
 
 /*
+ * transfer the SELinux data to a blank set of creds
+ */
+static void selinux_cred_transfer(struct cred *new, const struct cred *old)
+{
+	const struct task_security_struct *old_tsec = old->security;
+	struct task_security_struct *tsec = new->security;
+
+	*tsec = *old_tsec;
+}
+
+/*
  * set the security data for a kernel service
  * - all the creation contexts are set to unlabelled
  */
@@ -3292,6 +3338,11 @@
 	return 0;
 }
 
+static int selinux_kernel_module_request(void)
+{
+	return task_has_system(current, SYSTEM__MODULE_REQUEST);
+}
+
 static int selinux_task_setpgid(struct task_struct *p, pid_t pgid)
 {
 	return current_has_perm(p, PROCESS__SETPGID);
@@ -3409,7 +3460,7 @@
 
 /* Returns error only if unable to parse addresses */
 static int selinux_parse_skb_ipv4(struct sk_buff *skb,
-			struct avc_audit_data *ad, u8 *proto)
+			struct common_audit_data *ad, u8 *proto)
 {
 	int offset, ihlen, ret = -EINVAL;
 	struct iphdr _iph, *ih;
@@ -3490,7 +3541,7 @@
 
 /* Returns error only if unable to parse addresses */
 static int selinux_parse_skb_ipv6(struct sk_buff *skb,
-			struct avc_audit_data *ad, u8 *proto)
+			struct common_audit_data *ad, u8 *proto)
 {
 	u8 nexthdr;
 	int ret = -EINVAL, offset;
@@ -3561,7 +3612,7 @@
 
 #endif /* IPV6 */
 
-static int selinux_parse_skb(struct sk_buff *skb, struct avc_audit_data *ad,
+static int selinux_parse_skb(struct sk_buff *skb, struct common_audit_data *ad,
 			     char **_addrp, int src, u8 *proto)
 {
 	char *addrp;
@@ -3643,7 +3694,7 @@
 			   u32 perms)
 {
 	struct inode_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid;
 	int err = 0;
 
@@ -3653,7 +3704,7 @@
 		goto out;
 	sid = task_sid(task);
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.sk = sock->sk;
 	err = avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad);
 
@@ -3740,7 +3791,7 @@
 	if (family == PF_INET || family == PF_INET6) {
 		char *addrp;
 		struct inode_security_struct *isec;
-		struct avc_audit_data ad;
+		struct common_audit_data ad;
 		struct sockaddr_in *addr4 = NULL;
 		struct sockaddr_in6 *addr6 = NULL;
 		unsigned short snum;
@@ -3769,7 +3820,7 @@
 						      snum, &sid);
 				if (err)
 					goto out;
-				AVC_AUDIT_DATA_INIT(&ad, NET);
+				COMMON_AUDIT_DATA_INIT(&ad, NET);
 				ad.u.net.sport = htons(snum);
 				ad.u.net.family = family;
 				err = avc_has_perm(isec->sid, sid,
@@ -3802,7 +3853,7 @@
 		if (err)
 			goto out;
 
-		AVC_AUDIT_DATA_INIT(&ad, NET);
+		COMMON_AUDIT_DATA_INIT(&ad, NET);
 		ad.u.net.sport = htons(snum);
 		ad.u.net.family = family;
 
@@ -3836,7 +3887,7 @@
 	isec = SOCK_INODE(sock)->i_security;
 	if (isec->sclass == SECCLASS_TCP_SOCKET ||
 	    isec->sclass == SECCLASS_DCCP_SOCKET) {
-		struct avc_audit_data ad;
+		struct common_audit_data ad;
 		struct sockaddr_in *addr4 = NULL;
 		struct sockaddr_in6 *addr6 = NULL;
 		unsigned short snum;
@@ -3861,7 +3912,7 @@
 		perm = (isec->sclass == SECCLASS_TCP_SOCKET) ?
 		       TCP_SOCKET__NAME_CONNECT : DCCP_SOCKET__NAME_CONNECT;
 
-		AVC_AUDIT_DATA_INIT(&ad, NET);
+		COMMON_AUDIT_DATA_INIT(&ad, NET);
 		ad.u.net.dport = htons(snum);
 		ad.u.net.family = sk->sk_family;
 		err = avc_has_perm(isec->sid, sid, isec->sclass, perm, &ad);
@@ -3951,13 +4002,13 @@
 	struct sk_security_struct *ssec;
 	struct inode_security_struct *isec;
 	struct inode_security_struct *other_isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	int err;
 
 	isec = SOCK_INODE(sock)->i_security;
 	other_isec = SOCK_INODE(other)->i_security;
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.sk = other->sk;
 
 	err = avc_has_perm(isec->sid, other_isec->sid,
@@ -3983,13 +4034,13 @@
 {
 	struct inode_security_struct *isec;
 	struct inode_security_struct *other_isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	int err;
 
 	isec = SOCK_INODE(sock)->i_security;
 	other_isec = SOCK_INODE(other)->i_security;
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.sk = other->sk;
 
 	err = avc_has_perm(isec->sid, other_isec->sid,
@@ -4002,7 +4053,7 @@
 
 static int selinux_inet_sys_rcv_skb(int ifindex, char *addrp, u16 family,
 				    u32 peer_sid,
-				    struct avc_audit_data *ad)
+				    struct common_audit_data *ad)
 {
 	int err;
 	u32 if_sid;
@@ -4030,10 +4081,10 @@
 	struct sk_security_struct *sksec = sk->sk_security;
 	u32 peer_sid;
 	u32 sk_sid = sksec->sid;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	char *addrp;
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = skb->iif;
 	ad.u.net.family = family;
 	err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
@@ -4071,7 +4122,7 @@
 	struct sk_security_struct *sksec = sk->sk_security;
 	u16 family = sk->sk_family;
 	u32 sk_sid = sksec->sid;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	char *addrp;
 	u8 secmark_active;
 	u8 peerlbl_active;
@@ -4095,7 +4146,7 @@
 	if (!secmark_active && !peerlbl_active)
 		return 0;
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = skb->iif;
 	ad.u.net.family = family;
 	err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
@@ -4309,6 +4360,59 @@
 	fl->secid = req->secid;
 }
 
+static int selinux_tun_dev_create(void)
+{
+	u32 sid = current_sid();
+
+	/* we aren't taking into account the "sockcreate" SID since the socket
+	 * that is being created here is not a socket in the traditional sense,
+	 * instead it is a private sock, accessible only to the kernel, and
+	 * representing a wide range of network traffic spanning multiple
+	 * connections unlike traditional sockets - check the TUN driver to
+	 * get a better understanding of why this socket is special */
+
+	return avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__CREATE,
+			    NULL);
+}
+
+static void selinux_tun_dev_post_create(struct sock *sk)
+{
+	struct sk_security_struct *sksec = sk->sk_security;
+
+	/* we don't currently perform any NetLabel based labeling here and it
+	 * isn't clear that we would want to do so anyway; while we could apply
+	 * labeling without the support of the TUN user the resulting labeled
+	 * traffic from the other end of the connection would almost certainly
+	 * cause confusion to the TUN user that had no idea network labeling
+	 * protocols were being used */
+
+	/* see the comments in selinux_tun_dev_create() about why we don't use
+	 * the sockcreate SID here */
+
+	sksec->sid = current_sid();
+	sksec->sclass = SECCLASS_TUN_SOCKET;
+}
+
+static int selinux_tun_dev_attach(struct sock *sk)
+{
+	struct sk_security_struct *sksec = sk->sk_security;
+	u32 sid = current_sid();
+	int err;
+
+	err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
+			   TUN_SOCKET__RELABELFROM, NULL);
+	if (err)
+		return err;
+	err = avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET,
+			   TUN_SOCKET__RELABELTO, NULL);
+	if (err)
+		return err;
+
+	sksec->sid = sid;
+
+	return 0;
+}
+
 static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
 {
 	int err = 0;
@@ -4353,7 +4457,7 @@
 	int err;
 	char *addrp;
 	u32 peer_sid;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u8 secmark_active;
 	u8 netlbl_active;
 	u8 peerlbl_active;
@@ -4370,7 +4474,7 @@
 	if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0)
 		return NF_DROP;
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = ifindex;
 	ad.u.net.family = family;
 	if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0)
@@ -4458,7 +4562,7 @@
 {
 	struct sock *sk = skb->sk;
 	struct sk_security_struct *sksec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	char *addrp;
 	u8 proto;
 
@@ -4466,7 +4570,7 @@
 		return NF_ACCEPT;
 	sksec = sk->sk_security;
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = ifindex;
 	ad.u.net.family = family;
 	if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto))
@@ -4490,7 +4594,7 @@
 	u32 secmark_perm;
 	u32 peer_sid;
 	struct sock *sk;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	char *addrp;
 	u8 secmark_active;
 	u8 peerlbl_active;
@@ -4549,7 +4653,7 @@
 		secmark_perm = PACKET__SEND;
 	}
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = ifindex;
 	ad.u.net.family = family;
 	if (selinux_parse_skb(skb, &ad, &addrp, 0, NULL))
@@ -4619,13 +4723,13 @@
 static int selinux_netlink_recv(struct sk_buff *skb, int capability)
 {
 	int err;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 
 	err = cap_netlink_recv(skb, capability);
 	if (err)
 		return err;
 
-	AVC_AUDIT_DATA_INIT(&ad, CAP);
+	COMMON_AUDIT_DATA_INIT(&ad, CAP);
 	ad.u.cap = capability;
 
 	return avc_has_perm(NETLINK_CB(skb).sid, NETLINK_CB(skb).sid,
@@ -4684,12 +4788,12 @@
 			u32 perms)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 
 	isec = ipc_perms->security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = ipc_perms->key;
 
 	return avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad);
@@ -4709,7 +4813,7 @@
 static int selinux_msg_queue_alloc_security(struct msg_queue *msq)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 	int rc;
 
@@ -4719,7 +4823,7 @@
 
 	isec = msq->q_perm.security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = msq->q_perm.key;
 
 	rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
@@ -4739,12 +4843,12 @@
 static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 
 	isec = msq->q_perm.security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = msq->q_perm.key;
 
 	return avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
@@ -4783,7 +4887,7 @@
 {
 	struct ipc_security_struct *isec;
 	struct msg_security_struct *msec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 	int rc;
 
@@ -4804,7 +4908,7 @@
 			return rc;
 	}
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = msq->q_perm.key;
 
 	/* Can this process write to the queue? */
@@ -4828,14 +4932,14 @@
 {
 	struct ipc_security_struct *isec;
 	struct msg_security_struct *msec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = task_sid(target);
 	int rc;
 
 	isec = msq->q_perm.security;
 	msec = msg->security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = msq->q_perm.key;
 
 	rc = avc_has_perm(sid, isec->sid,
@@ -4850,7 +4954,7 @@
 static int selinux_shm_alloc_security(struct shmid_kernel *shp)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 	int rc;
 
@@ -4860,7 +4964,7 @@
 
 	isec = shp->shm_perm.security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = shp->shm_perm.key;
 
 	rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM,
@@ -4880,12 +4984,12 @@
 static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 
 	isec = shp->shm_perm.security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = shp->shm_perm.key;
 
 	return avc_has_perm(sid, isec->sid, SECCLASS_SHM,
@@ -4942,7 +5046,7 @@
 static int selinux_sem_alloc_security(struct sem_array *sma)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 	int rc;
 
@@ -4952,7 +5056,7 @@
 
 	isec = sma->sem_perm.security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = sma->sem_perm.key;
 
 	rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM,
@@ -4972,12 +5076,12 @@
 static int selinux_sem_associate(struct sem_array *sma, int semflg)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 
 	isec = sma->sem_perm.security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = sma->sem_perm.key;
 
 	return avc_has_perm(sid, isec->sid, SECCLASS_SEM,
@@ -5195,7 +5299,7 @@
 
 		/* Only allow single threaded processes to change context */
 		error = -EPERM;
-		if (!is_single_threaded(p)) {
+		if (!current_is_single_threaded()) {
 			error = security_bounded_transition(tsec->sid, sid);
 			if (error)
 				goto abort_change;
@@ -5252,6 +5356,32 @@
 	kfree(secdata);
 }
 
+/*
+ *	called with inode->i_mutex locked
+ */
+static int selinux_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+	return selinux_inode_setsecurity(inode, XATTR_SELINUX_SUFFIX, ctx, ctxlen, 0);
+}
+
+/*
+ *	called with inode->i_mutex locked
+ */
+static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+	return __vfs_setxattr_noperm(dentry, XATTR_NAME_SELINUX, ctx, ctxlen, 0);
+}
+
+static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+	int len = 0;
+	len = selinux_inode_getsecurity(inode, XATTR_SELINUX_SUFFIX,
+						ctx, true);
+	if (len < 0)
+		return len;
+	*ctxlen = len;
+	return 0;
+}
 #ifdef CONFIG_KEYS
 
 static int selinux_key_alloc(struct key *k, const struct cred *cred,
@@ -5323,7 +5453,7 @@
 static struct security_operations selinux_ops = {
 	.name =				"selinux",
 
-	.ptrace_may_access =		selinux_ptrace_may_access,
+	.ptrace_access_check =		selinux_ptrace_access_check,
 	.ptrace_traceme =		selinux_ptrace_traceme,
 	.capget =			selinux_capget,
 	.capset =			selinux_capset,
@@ -5396,10 +5526,13 @@
 	.dentry_open =			selinux_dentry_open,
 
 	.task_create =			selinux_task_create,
+	.cred_alloc_blank =		selinux_cred_alloc_blank,
 	.cred_free =			selinux_cred_free,
 	.cred_prepare =			selinux_cred_prepare,
+	.cred_transfer =		selinux_cred_transfer,
 	.kernel_act_as =		selinux_kernel_act_as,
 	.kernel_create_files_as =	selinux_kernel_create_files_as,
+	.kernel_module_request =	selinux_kernel_module_request,
 	.task_setpgid =			selinux_task_setpgid,
 	.task_getpgid =			selinux_task_getpgid,
 	.task_getsid =			selinux_task_getsid,
@@ -5448,6 +5581,9 @@
 	.secid_to_secctx =		selinux_secid_to_secctx,
 	.secctx_to_secid =		selinux_secctx_to_secid,
 	.release_secctx =		selinux_release_secctx,
+	.inode_notifysecctx =		selinux_inode_notifysecctx,
+	.inode_setsecctx =		selinux_inode_setsecctx,
+	.inode_getsecctx =		selinux_inode_getsecctx,
 
 	.unix_stream_connect =		selinux_socket_unix_stream_connect,
 	.unix_may_send =		selinux_socket_unix_may_send,
@@ -5477,6 +5613,9 @@
 	.inet_csk_clone =		selinux_inet_csk_clone,
 	.inet_conn_established =	selinux_inet_conn_established,
 	.req_classify_flow =		selinux_req_classify_flow,
+	.tun_dev_create =		selinux_tun_dev_create,
+	.tun_dev_post_create = 		selinux_tun_dev_post_create,
+	.tun_dev_attach =		selinux_tun_dev_attach,
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
@@ -5691,6 +5830,9 @@
 	selinux_disabled = 1;
 	selinux_enabled = 0;
 
+	/* Try to destroy the avc node cache */
+	avc_disable();
+
 	/* Reset security_ops to the secondary module, dummy or capability. */
 	security_ops = secondary_ops;
 

diff --git a/security/selinux/include/av_inherit.h b/security/selinux/include/av_inherit.h
index 8377a4b..abedcd7 100644
--- a/security/selinux/include/av_inherit.h
+++ b/security/selinux/include/av_inherit.h

@@ -15,6 +15,7 @@
    S_(SECCLASS_KEY_SOCKET, socket, 0x00400000UL)
    S_(SECCLASS_UNIX_STREAM_SOCKET, socket, 0x00400000UL)
    S_(SECCLASS_UNIX_DGRAM_SOCKET, socket, 0x00400000UL)
+   S_(SECCLASS_TUN_SOCKET, socket, 0x00400000UL)
    S_(SECCLASS_IPC, ipc, 0x00000200UL)
    S_(SECCLASS_SEM, ipc, 0x00000200UL)
    S_(SECCLASS_MSGQ, ipc, 0x00000200UL)

diff --git a/security/selinux/include/av_perm_to_string.h b/security/selinux/include/av_perm_to_string.h
index 31df1d7..2b683ad 100644
--- a/security/selinux/include/av_perm_to_string.h
+++ b/security/selinux/include/av_perm_to_string.h

@@ -107,6 +107,7 @@
    S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_READ, "syslog_read")
    S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_MOD, "syslog_mod")
    S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_CONSOLE, "syslog_console")
+   S_(SECCLASS_SYSTEM, SYSTEM__MODULE_REQUEST, "module_request")
    S_(SECCLASS_CAPABILITY, CAPABILITY__CHOWN, "chown")
    S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_OVERRIDE, "dac_override")
    S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_READ_SEARCH, "dac_read_search")

diff --git a/security/selinux/include/av_permissions.h b/security/selinux/include/av_permissions.h
index d645192..0546d61 100644
--- a/security/selinux/include/av_permissions.h
+++ b/security/selinux/include/av_permissions.h

@@ -423,6 +423,28 @@
 #define UNIX_DGRAM_SOCKET__RECV_MSG               0x00080000UL
 #define UNIX_DGRAM_SOCKET__SEND_MSG               0x00100000UL
 #define UNIX_DGRAM_SOCKET__NAME_BIND              0x00200000UL
+#define TUN_SOCKET__IOCTL                         0x00000001UL
+#define TUN_SOCKET__READ                          0x00000002UL
+#define TUN_SOCKET__WRITE                         0x00000004UL
+#define TUN_SOCKET__CREATE                        0x00000008UL
+#define TUN_SOCKET__GETATTR                       0x00000010UL
+#define TUN_SOCKET__SETATTR                       0x00000020UL
+#define TUN_SOCKET__LOCK                          0x00000040UL
+#define TUN_SOCKET__RELABELFROM                   0x00000080UL
+#define TUN_SOCKET__RELABELTO                     0x00000100UL
+#define TUN_SOCKET__APPEND                        0x00000200UL
+#define TUN_SOCKET__BIND                          0x00000400UL
+#define TUN_SOCKET__CONNECT                       0x00000800UL
+#define TUN_SOCKET__LISTEN                        0x00001000UL
+#define TUN_SOCKET__ACCEPT                        0x00002000UL
+#define TUN_SOCKET__GETOPT                        0x00004000UL
+#define TUN_SOCKET__SETOPT                        0x00008000UL
+#define TUN_SOCKET__SHUTDOWN                      0x00010000UL
+#define TUN_SOCKET__RECVFROM                      0x00020000UL
+#define TUN_SOCKET__SENDTO                        0x00040000UL
+#define TUN_SOCKET__RECV_MSG                      0x00080000UL
+#define TUN_SOCKET__SEND_MSG                      0x00100000UL
+#define TUN_SOCKET__NAME_BIND                     0x00200000UL
 #define PROCESS__FORK                             0x00000001UL
 #define PROCESS__TRANSITION                       0x00000002UL
 #define PROCESS__SIGCHLD                          0x00000004UL
@@ -508,6 +530,7 @@
 #define SYSTEM__SYSLOG_READ                       0x00000002UL
 #define SYSTEM__SYSLOG_MOD                        0x00000004UL
 #define SYSTEM__SYSLOG_CONSOLE                    0x00000008UL
+#define SYSTEM__MODULE_REQUEST                    0x00000010UL
 #define CAPABILITY__CHOWN                         0x00000001UL
 #define CAPABILITY__DAC_OVERRIDE                  0x00000002UL
 #define CAPABILITY__DAC_READ_SEARCH               0x00000004UL

diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index d12ff1a..e94e82f 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h

@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/init.h>
 #include <linux/audit.h>
+#include <linux/lsm_audit.h>
 #include <linux/in6.h>
 #include <linux/path.h>
 #include <asm/system.h>
@@ -36,48 +37,6 @@
 struct sock;
 struct sk_buff;
 
-/* Auxiliary data to use in generating the audit record. */
-struct avc_audit_data {
-	char    type;
-#define AVC_AUDIT_DATA_FS   1
-#define AVC_AUDIT_DATA_NET  2
-#define AVC_AUDIT_DATA_CAP  3
-#define AVC_AUDIT_DATA_IPC  4
-	struct task_struct *tsk;
-	union 	{
-		struct {
-			struct path path;
-			struct inode *inode;
-		} fs;
-		struct {
-			int netif;
-			struct sock *sk;
-			u16 family;
-			__be16 dport;
-			__be16 sport;
-			union {
-				struct {
-					__be32 daddr;
-					__be32 saddr;
-				} v4;
-				struct {
-					struct in6_addr daddr;
-					struct in6_addr saddr;
-				} v6;
-			} fam;
-		} net;
-		int cap;
-		int ipc_id;
-	} u;
-};
-
-#define v4info fam.v4
-#define v6info fam.v6
-
-/* Initialize an AVC audit data structure. */
-#define AVC_AUDIT_DATA_INIT(_d,_t) \
-	{ memset((_d), 0, sizeof(struct avc_audit_data)); (_d)->type = AVC_AUDIT_DATA_##_t; }
-
 /*
  * AVC statistics
  */
@@ -98,7 +57,9 @@
 
 void avc_audit(u32 ssid, u32 tsid,
 	       u16 tclass, u32 requested,
-	       struct av_decision *avd, int result, struct avc_audit_data *auditdata);
+	       struct av_decision *avd,
+	       int result,
+	       struct common_audit_data *a);
 
 #define AVC_STRICT 1 /* Ignore permissive mode. */
 int avc_has_perm_noaudit(u32 ssid, u32 tsid,
@@ -108,7 +69,7 @@
 
 int avc_has_perm(u32 ssid, u32 tsid,
 		 u16 tclass, u32 requested,
-		 struct avc_audit_data *auditdata);
+		 struct common_audit_data *auditdata);
 
 u32 avc_policy_seqno(void);
 
@@ -127,13 +88,13 @@
 		     u32 events, u32 ssid, u32 tsid,
 		     u16 tclass, u32 perms);
 
-/* Shows permission in human readable form */
-void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av);
-
 /* Exported to selinuxfs */
 int avc_get_hash_stats(char *page);
 extern unsigned int avc_cache_threshold;
 
+/* Attempt to free avc node cache */
+void avc_disable(void);
+
 #ifdef CONFIG_SECURITY_SELINUX_AVC_STATS
 DECLARE_PER_CPU(struct avc_cache_stats, avc_cache_stats);
 #endif

diff --git a/security/selinux/include/class_to_string.h b/security/selinux/include/class_to_string.h
index 21ec786..7ab9299 100644
--- a/security/selinux/include/class_to_string.h
+++ b/security/selinux/include/class_to_string.h

@@ -77,3 +77,4 @@
     S_(NULL)
     S_(NULL)
     S_("kernel_service")
+    S_("tun_socket")

diff --git a/security/selinux/include/flask.h b/security/selinux/include/flask.h
index 882f27d..f248500 100644
--- a/security/selinux/include/flask.h
+++ b/security/selinux/include/flask.h

@@ -53,6 +53,7 @@
 #define SECCLASS_PEER                                    68
 #define SECCLASS_CAPABILITY2                             69
 #define SECCLASS_KERNEL_SERVICE                          74
+#define SECCLASS_TUN_SOCKET                              75
 
 /*
  * Security identifier indices for initial entities

diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index b4b5b9b..8d73842 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h

@@ -59,7 +59,7 @@
 int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 				struct sk_buff *skb,
 				u16 family,
-				struct avc_audit_data *ad);
+				struct common_audit_data *ad);
 int selinux_netlbl_socket_setsockopt(struct socket *sock,
 				     int level,
 				     int optname);
@@ -129,7 +129,7 @@
 static inline int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 					      struct sk_buff *skb,
 					      u16 family,
-					      struct avc_audit_data *ad)
+					      struct common_audit_data *ad)
 {
 	return 0;
 }

diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 289e24b..13128f9 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h

@@ -41,9 +41,9 @@
 }
 
 int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
-			struct avc_audit_data *ad);
+			struct common_audit_data *ad);
 int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-			struct avc_audit_data *ad, u8 proto);
+			struct common_audit_data *ad, u8 proto);
 int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall);
 
 static inline void selinux_xfrm_notify_policyload(void)
@@ -57,13 +57,13 @@
 }
 
 static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
-			struct avc_audit_data *ad)
+			struct common_audit_data *ad)
 {
 	return 0;
 }
 
 static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-			struct avc_audit_data *ad, u8 proto)
+			struct common_audit_data *ad, u8 proto)
 {
 	return 0;
 }

diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index 2e98441..e688237 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c

@@ -342,7 +342,7 @@
 int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 				struct sk_buff *skb,
 				u16 family,
-				struct avc_audit_data *ad)
+				struct common_audit_data *ad)
 {
 	int rc;
 	u32 nlbl_sid;

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 500e6f7..ff17820 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c

@@ -22,6 +22,11 @@
  *
  *  Added validation of kernel classes and permissions
  *
+ * Updated: KaiGai Kohei <kaigai@ak.jp.nec.com>
+ *
+ *  Added support for bounds domain and audit messaged on masked permissions
+ *
+ * Copyright (C) 2008, 2009 NEC Corporation
  * Copyright (C) 2006, 2007 Hewlett-Packard Development Company, L.P.
  * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  * Copyright (C) 2003 - 2004, 2006 Tresys Technology, LLC
@@ -279,6 +284,95 @@
 }
 
 /*
+ * security_dump_masked_av - dumps masked permissions during
+ * security_compute_av due to RBAC, MLS/Constraint and Type bounds.
+ */
+static int dump_masked_av_helper(void *k, void *d, void *args)
+{
+	struct perm_datum *pdatum = d;
+	char **permission_names = args;
+
+	BUG_ON(pdatum->value < 1 || pdatum->value > 32);
+
+	permission_names[pdatum->value - 1] = (char *)k;
+
+	return 0;
+}
+
+static void security_dump_masked_av(struct context *scontext,
+				    struct context *tcontext,
+				    u16 tclass,
+				    u32 permissions,
+				    const char *reason)
+{
+	struct common_datum *common_dat;
+	struct class_datum *tclass_dat;
+	struct audit_buffer *ab;
+	char *tclass_name;
+	char *scontext_name = NULL;
+	char *tcontext_name = NULL;
+	char *permission_names[32];
+	int index, length;
+	bool need_comma = false;
+
+	if (!permissions)
+		return;
+
+	tclass_name = policydb.p_class_val_to_name[tclass - 1];
+	tclass_dat = policydb.class_val_to_struct[tclass - 1];
+	common_dat = tclass_dat->comdatum;
+
+	/* init permission_names */
+	if (common_dat &&
+	    hashtab_map(common_dat->permissions.table,
+			dump_masked_av_helper, permission_names) < 0)
+		goto out;
+
+	if (hashtab_map(tclass_dat->permissions.table,
+			dump_masked_av_helper, permission_names) < 0)
+		goto out;
+
+	/* get scontext/tcontext in text form */
+	if (context_struct_to_string(scontext,
+				     &scontext_name, &length) < 0)
+		goto out;
+
+	if (context_struct_to_string(tcontext,
+				     &tcontext_name, &length) < 0)
+		goto out;
+
+	/* audit a message */
+	ab = audit_log_start(current->audit_context,
+			     GFP_ATOMIC, AUDIT_SELINUX_ERR);
+	if (!ab)
+		goto out;
+
+	audit_log_format(ab, "op=security_compute_av reason=%s "
+			 "scontext=%s tcontext=%s tclass=%s perms=",
+			 reason, scontext_name, tcontext_name, tclass_name);
+
+	for (index = 0; index < 32; index++) {
+		u32 mask = (1 << index);
+
+		if ((mask & permissions) == 0)
+			continue;
+
+		audit_log_format(ab, "%s%s",
+				 need_comma ? "," : "",
+				 permission_names[index]
+				 ? permission_names[index] : "????");
+		need_comma = true;
+	}
+	audit_log_end(ab);
+out:
+	/* release scontext/tcontext */
+	kfree(tcontext_name);
+	kfree(scontext_name);
+
+	return;
+}
+
+/*
  * security_boundary_permission - drops violated permissions
  * on boundary constraint.
  */
@@ -347,28 +441,12 @@
 	}
 
 	if (masked) {
-		struct audit_buffer *ab;
-		char *stype_name
-			= policydb.p_type_val_to_name[source->value - 1];
-		char *ttype_name
-			= policydb.p_type_val_to_name[target->value - 1];
-		char *tclass_name
-			= policydb.p_class_val_to_name[tclass - 1];
-
 		/* mask violated permissions */
 		avd->allowed &= ~masked;
 
-		/* notice to userspace via audit message */
-		ab = audit_log_start(current->audit_context,
-				     GFP_ATOMIC, AUDIT_SELINUX_ERR);
-		if (!ab)
-			return;
-
-		audit_log_format(ab, "av boundary violation: "
-				 "source=%s target=%s tclass=%s",
-				 stype_name, ttype_name, tclass_name);
-		avc_dump_av(ab, tclass, masked);
-		audit_log_end(ab);
+		/* audit masked permissions */
+		security_dump_masked_av(scontext, tcontext,
+					tclass, masked, "bounds");
 	}
 }
 
@@ -480,7 +558,7 @@
 		if ((constraint->permissions & (avd->allowed)) &&
 		    !constraint_expr_eval(scontext, tcontext, NULL,
 					  constraint->expr)) {
-			avd->allowed = (avd->allowed) & ~(constraint->permissions);
+			avd->allowed &= ~(constraint->permissions);
 		}
 		constraint = constraint->next;
 	}
@@ -499,8 +577,8 @@
 				break;
 		}
 		if (!ra)
-			avd->allowed = (avd->allowed) & ~(PROCESS__TRANSITION |
-							PROCESS__DYNTRANSITION);
+			avd->allowed &= ~(PROCESS__TRANSITION |
+					  PROCESS__DYNTRANSITION);
 	}
 
 	/*
@@ -687,6 +765,26 @@
 		}
 		index = type->bounds;
 	}
+
+	if (rc) {
+		char *old_name = NULL;
+		char *new_name = NULL;
+		int length;
+
+		if (!context_struct_to_string(old_context,
+					      &old_name, &length) &&
+		    !context_struct_to_string(new_context,
+					      &new_name, &length)) {
+			audit_log(current->audit_context,
+				  GFP_ATOMIC, AUDIT_SELINUX_ERR,
+				  "op=security_bounded_transition "
+				  "result=denied "
+				  "oldcontext=%s newcontext=%s",
+				  old_name, new_name);
+		}
+		kfree(new_name);
+		kfree(old_name);
+	}
 out:
 	read_unlock(&policy_rwlock);
 

diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 72b1845..f3cb9ed 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c

@@ -401,7 +401,7 @@
  * gone thru the IPSec process.
  */
 int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
-				struct avc_audit_data *ad)
+				struct common_audit_data *ad)
 {
 	int i, rc = 0;
 	struct sec_path *sp;
@@ -442,7 +442,7 @@
  * checked in the selinux_xfrm_state_pol_flow_match hook above.
  */
 int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-					struct avc_audit_data *ad, u8 proto)
+					struct common_audit_data *ad, u8 proto)
 {
 	struct dst_entry *dst;
 	int rc = 0;

diff --git a/security/smack/smack.h b/security/smack/smack.h
index 243bec1..c6e9aca 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h

@@ -275,7 +275,7 @@
 {
 	memset(a, 0, sizeof(*a));
 	a->a.type = type;
-	a->a.function = func;
+	a->a.smack_audit_data.function = func;
 }
 
 static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,

diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 513dc1a..0f9ac81 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c

@@ -240,8 +240,9 @@
 static void smack_log_callback(struct audit_buffer *ab, void *a)
 {
 	struct common_audit_data *ad = a;
-	struct smack_audit_data *sad = &ad->lsm_priv.smack_audit_data;
-	audit_log_format(ab, "lsm=SMACK fn=%s action=%s", ad->function,
+	struct smack_audit_data *sad = &ad->smack_audit_data;
+	audit_log_format(ab, "lsm=SMACK fn=%s action=%s",
+			 ad->smack_audit_data.function,
 			 sad->result ? "denied" : "granted");
 	audit_log_format(ab, " subject=");
 	audit_log_untrustedstring(ab, sad->subject);
@@ -274,11 +275,11 @@
 	if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0)
 		return;
 
-	if (a->function == NULL)
-		a->function = "unknown";
+	if (a->smack_audit_data.function == NULL)
+		a->smack_audit_data.function = "unknown";
 
 	/* end preparing the audit data */
-	sad = &a->lsm_priv.smack_audit_data;
+	sad = &a->smack_audit_data;
 	smack_str_from_perm(request_buffer, request);
 	sad->subject = subject_label;
 	sad->object  = object_label;

diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 0023182..acae7ef4 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c

@@ -91,7 +91,7 @@
  */
 
 /**
- * smack_ptrace_may_access - Smack approval on PTRACE_ATTACH
+ * smack_ptrace_access_check - Smack approval on PTRACE_ATTACH
  * @ctp: child task pointer
  * @mode: ptrace attachment mode
  *
@@ -99,13 +99,13 @@
  *
  * Do the capability checks, and require read and write.
  */
-static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode)
+static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode)
 {
 	int rc;
 	struct smk_audit_info ad;
 	char *sp, *tsp;
 
-	rc = cap_ptrace_may_access(ctp, mode);
+	rc = cap_ptrace_access_check(ctp, mode);
 	if (rc != 0)
 		return rc;
 
@@ -1080,6 +1080,22 @@
  */
 
 /**
+ * smack_cred_alloc_blank - "allocate" blank task-level security credentials
+ * @new: the new credentials
+ * @gfp: the atomicity of any memory allocations
+ *
+ * Prepare a blank set of credentials for modification.  This must allocate all
+ * the memory the LSM module might require such that cred_transfer() can
+ * complete without error.
+ */
+static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+	cred->security = NULL;
+	return 0;
+}
+
+
+/**
  * smack_cred_free - "free" task-level security credentials
  * @cred: the credentials in question
  *
@@ -1117,6 +1133,18 @@
 }
 
 /**
+ * smack_cred_transfer - Transfer the old credentials to the new credentials
+ * @new: the new credentials
+ * @old: the original credentials
+ *
+ * Fill in a set of blank credentials from another set of credentials.
+ */
+static void smack_cred_transfer(struct cred *new, const struct cred *old)
+{
+	new->security = old->security;
+}
+
+/**
  * smack_kernel_act_as - Set the subjective context in a set of credentials
  * @new: points to the set of credentials to be modified.
  * @secid: specifies the security ID to be set
@@ -1638,6 +1666,7 @@
 
 	if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) {
 		nsp->smk_inode = sp;
+		nsp->smk_flags |= SMK_INODE_INSTANT;
 		return 0;
 	}
 	/*
@@ -2464,7 +2493,7 @@
 	/*
 	 * Perfectly reasonable for this to be NULL
 	 */
-	if (sip == NULL || sip->sin_family != PF_INET)
+	if (sip == NULL || sip->sin_family != AF_INET)
 		return 0;
 
 	return smack_netlabel_send(sock->sk, sip);
@@ -3029,10 +3058,31 @@
 {
 }
 
+static int smack_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+	return smack_inode_setsecurity(inode, XATTR_SMACK_SUFFIX, ctx, ctxlen, 0);
+}
+
+static int smack_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+	return __vfs_setxattr_noperm(dentry, XATTR_NAME_SMACK, ctx, ctxlen, 0);
+}
+
+static int smack_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+	int len = 0;
+	len = smack_inode_getsecurity(inode, XATTR_SMACK_SUFFIX, ctx, true);
+
+	if (len < 0)
+		return len;
+	*ctxlen = len;
+	return 0;
+}
+
 struct security_operations smack_ops = {
 	.name =				"smack",
 
-	.ptrace_may_access =		smack_ptrace_may_access,
+	.ptrace_access_check =		smack_ptrace_access_check,
 	.ptrace_traceme =		smack_ptrace_traceme,
 	.syslog = 			smack_syslog,
 
@@ -3073,9 +3123,11 @@
 	.file_send_sigiotask = 		smack_file_send_sigiotask,
 	.file_receive = 		smack_file_receive,
 
+	.cred_alloc_blank =		smack_cred_alloc_blank,
 	.cred_free =			smack_cred_free,
 	.cred_prepare =			smack_cred_prepare,
 	.cred_commit =			smack_cred_commit,
+	.cred_transfer =		smack_cred_transfer,
 	.kernel_act_as =		smack_kernel_act_as,
 	.kernel_create_files_as =	smack_kernel_create_files_as,
 	.task_setpgid = 		smack_task_setpgid,
@@ -3155,6 +3207,9 @@
 	.secid_to_secctx = 		smack_secid_to_secctx,
 	.secctx_to_secid = 		smack_secctx_to_secid,
 	.release_secctx = 		smack_release_secctx,
+	.inode_notifysecctx =		smack_inode_notifysecctx,
+	.inode_setsecctx =		smack_inode_setsecctx,
+	.inode_getsecctx =		smack_inode_getsecctx,
 };
 
 

diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index fdd1f4b8..3c8bd8e 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c

@@ -1285,6 +1285,36 @@
 }
 
 /**
+ * tomoyo_delete_domain - Delete a domain.
+ *
+ * @domainname: The name of domain.
+ *
+ * Returns 0.
+ */
+static int tomoyo_delete_domain(char *domainname)
+{
+	struct tomoyo_domain_info *domain;
+	struct tomoyo_path_info name;
+
+	name.name = domainname;
+	tomoyo_fill_path_info(&name);
+	down_write(&tomoyo_domain_list_lock);
+	/* Is there an active domain? */
+	list_for_each_entry(domain, &tomoyo_domain_list, list) {
+		/* Never delete tomoyo_kernel_domain */
+		if (domain == &tomoyo_kernel_domain)
+			continue;
+		if (domain->is_deleted ||
+		    tomoyo_pathcmp(domain->domainname, &name))
+			continue;
+		domain->is_deleted = true;
+		break;
+	}
+	up_write(&tomoyo_domain_list_lock);
+	return 0;
+}
+
+/**
  * tomoyo_write_domain_policy - Write domain policy.
  *
  * @head: Pointer to "struct tomoyo_io_buffer".

diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index 6d6ba09..31df541 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h

@@ -339,8 +339,6 @@
 const char *tomoyo_get_msg(const bool is_enforce);
 /* Convert single path operation to operation name. */
 const char *tomoyo_sp2keyword(const u8 operation);
-/* Delete a domain. */
-int tomoyo_delete_domain(char *data);
 /* Create "alias" entry in exception policy. */
 int tomoyo_write_alias_policy(char *data, const bool is_delete);
 /*

diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 1d8b169..fcf52ac 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c

@@ -717,38 +717,6 @@
 	return tomoyo_update_alias_entry(data, cp, is_delete);
 }
 
-/* Domain create/delete handler. */
-
-/**
- * tomoyo_delete_domain - Delete a domain.
- *
- * @domainname: The name of domain.
- *
- * Returns 0.
- */
-int tomoyo_delete_domain(char *domainname)
-{
-	struct tomoyo_domain_info *domain;
-	struct tomoyo_path_info name;
-
-	name.name = domainname;
-	tomoyo_fill_path_info(&name);
-	down_write(&tomoyo_domain_list_lock);
-	/* Is there an active domain? */
-	list_for_each_entry(domain, &tomoyo_domain_list, list) {
-		/* Never delete tomoyo_kernel_domain */
-		if (domain == &tomoyo_kernel_domain)
-			continue;
-		if (domain->is_deleted ||
-		    tomoyo_pathcmp(domain->domainname, &name))
-			continue;
-		domain->is_deleted = true;
-		break;
-	}
-	up_write(&tomoyo_domain_list_lock);
-	return 0;
-}
-
 /**
  * tomoyo_find_or_assign_new_domain - Create a domain.
  *
@@ -818,13 +786,11 @@
 /**
  * tomoyo_find_next_domain - Find a domain.
  *
- * @bprm:           Pointer to "struct linux_binprm".
- * @next_domain:    Pointer to pointer to "struct tomoyo_domain_info".
+ * @bprm: Pointer to "struct linux_binprm".
  *
  * Returns 0 on success, negative value otherwise.
  */
-int tomoyo_find_next_domain(struct linux_binprm *bprm,
-			    struct tomoyo_domain_info **next_domain)
+int tomoyo_find_next_domain(struct linux_binprm *bprm)
 {
 	/*
 	 * This function assumes that the size of buffer returned by
@@ -946,9 +912,11 @@
 		tomoyo_set_domain_flag(old_domain, false,
 				       TOMOYO_DOMAIN_FLAGS_TRANSITION_FAILED);
  out:
+	if (!domain)
+		domain = old_domain;
+	bprm->cred->security = domain;
 	tomoyo_free(real_program_name);
 	tomoyo_free(symlink_program_name);
-	*next_domain = domain ? domain : old_domain;
 	tomoyo_free(tmp);
 	return retval;
 }

diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 3194d09..9548a09 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c

@@ -14,6 +14,12 @@
 #include "tomoyo.h"
 #include "realpath.h"
 
+static int tomoyo_cred_alloc_blank(struct cred *new, gfp_t gfp)
+{
+	new->security = NULL;
+	return 0;
+}
+
 static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
 			       gfp_t gfp)
 {
@@ -25,6 +31,15 @@
 	return 0;
 }
 
+static void tomoyo_cred_transfer(struct cred *new, const struct cred *old)
+{
+	/*
+	 * Since "struct tomoyo_domain_info *" is a sharable pointer,
+	 * we don't need to duplicate.
+	 */
+	new->security = old->security;
+}
+
 static int tomoyo_bprm_set_creds(struct linux_binprm *bprm)
 {
 	int rc;
@@ -61,14 +76,8 @@
 	 * Execute permission is checked against pathname passed to do_execve()
 	 * using current domain.
 	 */
-	if (!domain) {
-		struct tomoyo_domain_info *next_domain = NULL;
-		int retval = tomoyo_find_next_domain(bprm, &next_domain);
-
-		if (!retval)
-			bprm->cred->security = next_domain;
-		return retval;
-	}
+	if (!domain)
+		return tomoyo_find_next_domain(bprm);
 	/*
 	 * Read permission is checked against interpreters using next domain.
 	 * '1' is the result of open_to_namei_flags(O_RDONLY).
@@ -268,7 +277,9 @@
  */
 static struct security_operations tomoyo_security_ops = {
 	.name                = "tomoyo",
+	.cred_alloc_blank    = tomoyo_cred_alloc_blank,
 	.cred_prepare        = tomoyo_cred_prepare,
+	.cred_transfer	     = tomoyo_cred_transfer,
 	.bprm_set_creds      = tomoyo_bprm_set_creds,
 	.bprm_check_security = tomoyo_bprm_check_security,
 #ifdef CONFIG_SYSCTL

diff --git a/security/tomoyo/tomoyo.h b/security/tomoyo/tomoyo.h
index 0fd588a..cd6ba0b 100644
--- a/security/tomoyo/tomoyo.h
+++ b/security/tomoyo/tomoyo.h

@@ -31,8 +31,7 @@
 			    struct path *path2);
 int tomoyo_check_rewrite_permission(struct tomoyo_domain_info *domain,
 				    struct file *filp);
-int tomoyo_find_next_domain(struct linux_binprm *bprm,
-			    struct tomoyo_domain_info **next_domain);
+int tomoyo_find_next_domain(struct linux_binprm *bprm);
 
 /* Index numbers for Access Controls. */
commit	a9c86d42599519f3d83b5f46bdab25046fe47b84	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Fri Sep 11 09:19:35 2009 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Fri Sep 11 09:19:35 2009 -0700
tree	9b269e3162e5cc0c1a8dfc3349303c902718a9a9
parent	a12e4d304ce701844c639541d90df86e165d03f9 [diff]
parent	1110afbe728838ac7ce973c37af9e11385dbaef9 [diff]