Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 updates from Martin Schwidefsky:
 "There is only one new feature in this pull for the 4.4 merge window,
  most of it is small enhancements, cleanup and bug fixes:

   - Add the s390 backend for the software dirty bit tracking.  This
     adds two new pgtable functions pte_clear_soft_dirty and
     pmd_clear_soft_dirty which is why there is a hit to
     arch/x86/include/asm/pgtable.h in this pull request.

   - A series of cleanup patches for the AP bus, this includes the
     removal of the support for two outdated crypto cards (PCICC and
     PCICA).

   - The irq handling / signaling on buffer full in the runtime
     instrumentation code is dropped.

   - Some micro optimizations: remove unnecessary memory barriers for a
     couple of functions: [smb_]rmb, [smb_]wmb, atomics, bitops, and for
     spin_unlock.  Use the builtin bswap if available and make
     test_and_set_bit_lock more cache friendly.

   - Statistics and a tracepoint for the diagnose calls to the
     hypervisor.

   - The CPU measurement facility support to sample KVM guests is
     improved.

   - The vector instructions are now always enabled for user space
     processes if the hardware has the vector facility.  This simplifies
     the FPU handling code.  The fpu-internal.h header is split into fpu
     internals, api and types just like x86.

   - Cleanup and improvements for the common I/O layer.

   - Rework udelay to solve a problem with kprobe.  udelay has busy loop
     semantics but still uses an idle processor state for the wait"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (66 commits)
  s390: remove runtime instrumentation interrupts
  s390/cio: de-duplicate subchannel validation
  s390/css: unneeded initialization in for_each_subchannel
  s390/Kconfig: use builtin bswap
  s390/dasd: fix disconnected device with valid path mask
  s390/dasd: fix invalid PAV assignment after suspend/resume
  s390/dasd: fix double free in dasd_eckd_read_conf
  s390/kernel: fix ptrace peek/poke for floating point registers
  s390/cio: move ccw_device_stlck functions
  s390/cio: move ccw_device_call_handler
  s390/topology: reduce per_cpu() invocations
  s390/nmi: reduce size of percpu variable
  s390/nmi: fix terminology
  s390/nmi: remove casts
  s390/nmi: remove pointless error strings
  s390: don't store registers on disabled wait anymore
  s390: get rid of __set_psw_mask()
  s390/fpu: split fpu-internal.h into fpu internals, api, and type headers
  s390/dasd: fix list_del corruption after lcu changes
  s390/spinlock: remove unneeded serializations at unlock
  ...
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1d57000..9b9a2db 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -101,6 +101,7 @@
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_NUMA_BALANCING
+	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANTS_PROT_NUMA_PROT_NONE
 	select ARCH_WANT_IPC_PARSE_VERSION
@@ -118,6 +119,7 @@
 	select HAVE_ARCH_EARLY_PFN_TO_NID
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_ARCH_SOFT_DIRTY
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 5eeffee..0450357 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -15,6 +15,7 @@
 #include <linux/string.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
+#include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include "hypfs.h"
 
@@ -336,7 +337,7 @@
 
 /* Diagnose 204 functions */
 
-static int diag204(unsigned long subcode, unsigned long size, void *addr)
+static inline int __diag204(unsigned long subcode, unsigned long size, void *addr)
 {
 	register unsigned long _subcode asm("0") = subcode;
 	register unsigned long _size asm("1") = size;
@@ -351,6 +352,12 @@
 	return _size;
 }
 
+static int diag204(unsigned long subcode, unsigned long size, void *addr)
+{
+	diag_stat_inc(DIAG_STAT_X204);
+	return __diag204(subcode, size, addr);
+}
+
 /*
  * For the old diag subcode 4 with simple data format we have to use real
  * memory. If we use subcode 6 or 7 with extended data format, we can (and
@@ -505,6 +512,7 @@
 {
 	int rc = -EOPNOTSUPP;
 
+	diag_stat_inc(DIAG_STAT_X224);
 	asm volatile(
 		"	diag	%1,%2,0x224\n"
 		"0:	lhi	%0,0x0\n"
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
index 24c747a..0f1927c 100644
--- a/arch/s390/hypfs/hypfs_diag0c.c
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -8,6 +8,7 @@
 
 #include <linux/slab.h>
 #include <linux/cpu.h>
+#include <asm/diag.h>
 #include <asm/hypfs.h>
 #include "hypfs.h"
 
@@ -18,6 +19,7 @@
  */
 static void diag0c(struct hypfs_diag0c_entry *entry)
 {
+	diag_stat_inc(DIAG_STAT_X00C);
 	asm volatile (
 		"	sam31\n"
 		"	diag	%0,%0,0x0c\n"
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
index dd42a26..c9e5c72 100644
--- a/arch/s390/hypfs/hypfs_sprp.c
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <linux/uaccess.h>
 #include <asm/compat.h>
+#include <asm/diag.h>
 #include <asm/sclp.h>
 #include "hypfs.h"
 
@@ -22,7 +23,7 @@
 
 #define DIAG304_CMD_MAX		2
 
-static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
+static inline unsigned long __hypfs_sprp_diag304(void *data, unsigned long cmd)
 {
 	register unsigned long _data asm("2") = (unsigned long) data;
 	register unsigned long _rc asm("3");
@@ -34,6 +35,12 @@
 	return _rc;
 }
 
+static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
+{
+	diag_stat_inc(DIAG_STAT_X304);
+	return __hypfs_sprp_diag304(data, cmd);
+}
+
 static void hypfs_sprp_free(const void *data)
 {
 	free_page((unsigned long) data);
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index afbe079..44feac3 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -9,6 +9,7 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/vmalloc.h>
+#include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include <asm/timex.h>
 #include "hypfs.h"
@@ -66,6 +67,7 @@
 	memset(parm_list.aci_grp, 0x40, NAME_LEN);
 	rc = -1;
 
+	diag_stat_inc(DIAG_STAT_X2FC);
 	asm volatile(
 		"	diag    %0,%1,0x2fc\n"
 		"0:\n"
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
index 16887c5..a6263d4 100644
--- a/arch/s390/include/asm/appldata.h
+++ b/arch/s390/include/asm/appldata.h
@@ -7,6 +7,7 @@
 #ifndef _ASM_S390_APPLDATA_H
 #define _ASM_S390_APPLDATA_H
 
+#include <asm/diag.h>
 #include <asm/io.h>
 
 #define APPLDATA_START_INTERVAL_REC	0x80
@@ -53,6 +54,7 @@
 	parm_list.buffer_length = length;
 	parm_list.product_id_addr = (unsigned long) id;
 	parm_list.buffer_addr = virt_to_phys(buffer);
+	diag_stat_inc(DIAG_STAT_X0DC);
 	asm volatile(
 		"	diag	%1,%0,0xdc"
 		: "=d" (ry)
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 117fa5c..911064a 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -36,7 +36,6 @@
 									\
 	typecheck(atomic_t *, ptr);					\
 	asm volatile(							\
-		__barrier						\
 		op_string "	%0,%2,%1\n"				\
 		__barrier						\
 		: "=d" (old_val), "+Q" ((ptr)->counter)			\
@@ -180,7 +179,6 @@
 									\
 	typecheck(atomic64_t *, ptr);					\
 	asm volatile(							\
-		__barrier						\
 		op_string "	%0,%2,%1\n"				\
 		__barrier						\
 		: "=d" (old_val), "+Q" ((ptr)->counter)			\
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index d48fe01..d68e11e 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -22,10 +22,10 @@
 
 #define mb() do {  asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
 
-#define rmb()				mb()
-#define wmb()				mb()
-#define dma_rmb()			rmb()
-#define dma_wmb()			wmb()
+#define rmb()				barrier()
+#define wmb()				barrier()
+#define dma_rmb()			mb()
+#define dma_wmb()			mb()
 #define smp_mb()			mb()
 #define smp_rmb()			rmb()
 #define smp_wmb()			wmb()
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 9b68e98..8043f10 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -11,30 +11,25 @@
  * big-endian system because, unlike little endian, the number of each
  * bit depends on the word size.
  *
- * The bitop functions are defined to work on unsigned longs, so for an
- * s390x system the bits end up numbered:
+ * The bitop functions are defined to work on unsigned longs, so the bits
+ * end up numbered:
  *   |63..............0|127............64|191...........128|255...........192|
- * and on s390:
- *   |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224|
  *
  * There are a few little-endian macros used mostly for filesystem
- * bitmaps, these work on similar bit arrays layouts, but
- * byte-oriented:
+ * bitmaps, these work on similar bit array layouts, but byte-oriented:
  *   |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
  *
- * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit
- * number field needs to be reversed compared to the big-endian bit
- * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b).
+ * The main difference is that bit 3-5 in the bit number field needs to be
+ * reversed compared to the big-endian bit fields. This can be achieved by
+ * XOR with 0x38.
  *
- * We also have special functions which work with an MSB0 encoding:
- * on an s390x system the bits are numbered:
+ * We also have special functions which work with an MSB0 encoding.
+ * The bits are numbered:
  *   |0..............63|64............127|128...........191|192...........255|
- * and on s390:
- *   |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
  *
- * The main difference is that bit 0-63 (64b) or 0-31 (32b) in the bit
- * number field needs to be reversed compared to the LSB0 encoded bit
- * fields. This can be achieved by XOR with 0x3f (64b) or 0x1f (32b).
+ * The main difference is that bit 0-63 in the bit number field needs to be
+ * reversed compared to the LSB0 encoded bit fields. This can be achieved by
+ * XOR with 0x3f.
  *
  */
 
@@ -64,7 +59,6 @@
 								\
 	typecheck(unsigned long *, (__addr));			\
 	asm volatile(						\
-		__barrier					\
 		__op_string "	%0,%2,%1\n"			\
 		__barrier					\
 		: "=d" (__old),	"+Q" (*(__addr))		\
@@ -276,12 +270,32 @@
 	return (*addr >> (nr & 7)) & 1;
 }
 
+static inline int test_and_set_bit_lock(unsigned long nr,
+					volatile unsigned long *ptr)
+{
+	if (test_bit(nr, ptr))
+		return 1;
+	return test_and_set_bit(nr, ptr);
+}
+
+static inline void clear_bit_unlock(unsigned long nr,
+				    volatile unsigned long *ptr)
+{
+	smp_mb__before_atomic();
+	clear_bit(nr, ptr);
+}
+
+static inline void __clear_bit_unlock(unsigned long nr,
+				      volatile unsigned long *ptr)
+{
+	smp_mb();
+	__clear_bit(nr, ptr);
+}
+
 /*
  * Functions which use MSB0 bit numbering.
- * On an s390x system the bits are numbered:
+ * The bits are numbered:
  *   |0..............63|64............127|128...........191|192...........255|
- * and on s390:
- *   |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
  */
 unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size);
 unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
@@ -446,7 +460,6 @@
 #include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/find.h>
 #include <asm-generic/bitops/hweight.h>
-#include <asm-generic/bitops/lock.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/le.h>
 #include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 0963392..0c5d8ee 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -5,6 +5,7 @@
 #define _ASM_S390_CIO_H_
 
 #include <linux/spinlock.h>
+#include <linux/bitops.h>
 #include <asm/types.h>
 
 #define LPM_ANYPATH 0xff
@@ -296,6 +297,15 @@
 	return 0;
 }
 
+/**
+ * pathmask_to_pos() - find the position of the left-most bit in a pathmask
+ * @mask: pathmask with at least one bit set
+ */
+static inline u8 pathmask_to_pos(u8 mask)
+{
+	return 8 - ffs(mask);
+}
+
 void channel_subsystem_reinit(void);
 extern void css_schedule_reprobe(void);
 
diff --git a/arch/s390/include/asm/cmb.h b/arch/s390/include/asm/cmb.h
index 806eac1..ed2630c 100644
--- a/arch/s390/include/asm/cmb.h
+++ b/arch/s390/include/asm/cmb.h
@@ -6,6 +6,7 @@
 struct ccw_device;
 extern int enable_cmf(struct ccw_device *cdev);
 extern int disable_cmf(struct ccw_device *cdev);
+extern int __disable_cmf(struct ccw_device *cdev);
 extern u64 cmf_read(struct ccw_device *cdev, int index);
 extern int cmf_readall(struct ccw_device *cdev, struct cmbdata *data);
 
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 411464f..24ea694 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -32,7 +32,7 @@
 	__old;								\
 })
 
-#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn)		\
+#define __cmpxchg_double(p1, p2, o1, o2, n1, n2)			\
 ({									\
 	register __typeof__(*(p1)) __old1 asm("2") = (o1);		\
 	register __typeof__(*(p2)) __old2 asm("3") = (o2);		\
@@ -40,7 +40,7 @@
 	register __typeof__(*(p2)) __new2 asm("5") = (n2);		\
 	int cc;								\
 	asm volatile(							\
-			insn   " %[old],%[new],%[ptr]\n"		\
+		"	cdsg	%[old],%[new],%[ptr]\n"			\
 		"	ipm	%[cc]\n"				\
 		"	srl	%[cc],28"				\
 		: [cc] "=d" (cc), [old] "+d" (__old1), "+d" (__old2)	\
@@ -50,30 +50,6 @@
 	!cc;								\
 })
 
-#define __cmpxchg_double_4(p1, p2, o1, o2, n1, n2) \
-	__cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cds")
-
-#define __cmpxchg_double_8(p1, p2, o1, o2, n1, n2) \
-	__cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cdsg")
-
-extern void __cmpxchg_double_called_with_bad_pointer(void);
-
-#define __cmpxchg_double(p1, p2, o1, o2, n1, n2)			\
-({									\
-	int __ret;							\
-	switch (sizeof(*(p1))) {					\
-	case 4:								\
-		__ret = __cmpxchg_double_4(p1, p2, o1, o2, n1, n2);	\
-		break;							\
-	case 8:								\
-		__ret = __cmpxchg_double_8(p1, p2, o1, o2, n1, n2);	\
-		break;							\
-	default:							\
-		__cmpxchg_double_called_with_bad_pointer();		\
-	}								\
-	__ret;								\
-})
-
 #define cmpxchg_double(p1, p2, o1, o2, n1, n2)				\
 ({									\
 	__typeof__(p1) __p1 = (p1);					\
@@ -81,7 +57,7 @@
 	BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long));			\
 	BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long));			\
 	VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\
-	__cmpxchg_double_8(__p1, __p2, o1, o2, n1, n2);			\
+	__cmpxchg_double(__p1, __p2, o1, o2, n1, n2);			\
 })
 
 #define system_has_cmpxchg_double()	1
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 5243a86..9dd04b9 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -22,15 +22,10 @@
 #define CPU_MF_INT_SF_LSDA	(1 << 22)	/* loss of sample data alert */
 #define CPU_MF_INT_CF_CACA	(1 <<  7)	/* counter auth. change alert */
 #define CPU_MF_INT_CF_LCDA	(1 <<  6)	/* loss of counter data alert */
-#define CPU_MF_INT_RI_HALTED	(1 <<  5)	/* run-time instr. halted */
-#define CPU_MF_INT_RI_BUF_FULL	(1 <<  4)	/* run-time instr. program
-						   buffer full */
-
 #define CPU_MF_INT_CF_MASK	(CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA)
 #define CPU_MF_INT_SF_MASK	(CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE|	\
 				 CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA|	\
 				 CPU_MF_INT_SF_LSDA)
-#define CPU_MF_INT_RI_MASK	(CPU_MF_INT_RI_HALTED|CPU_MF_INT_RI_BUF_FULL)
 
 /* CPU measurement facility support */
 static inline int cpum_cf_avail(void)
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 17a3735..d7697ab 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -46,8 +46,6 @@
 	__ctl_load(reg, cr, cr);
 }
 
-void __ctl_set_vx(void);
-
 void smp_ctl_set_bit(int cr, int bit);
 void smp_ctl_clear_bit(int cr, int bit);
 
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 7e91c58..5fac921 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -8,6 +8,34 @@
 #ifndef _ASM_S390_DIAG_H
 #define _ASM_S390_DIAG_H
 
+#include <linux/percpu.h>
+
+enum diag_stat_enum {
+	DIAG_STAT_X008,
+	DIAG_STAT_X00C,
+	DIAG_STAT_X010,
+	DIAG_STAT_X014,
+	DIAG_STAT_X044,
+	DIAG_STAT_X064,
+	DIAG_STAT_X09C,
+	DIAG_STAT_X0DC,
+	DIAG_STAT_X204,
+	DIAG_STAT_X210,
+	DIAG_STAT_X224,
+	DIAG_STAT_X250,
+	DIAG_STAT_X258,
+	DIAG_STAT_X288,
+	DIAG_STAT_X2C4,
+	DIAG_STAT_X2FC,
+	DIAG_STAT_X304,
+	DIAG_STAT_X308,
+	DIAG_STAT_X500,
+	NR_DIAG_STAT
+};
+
+void diag_stat_inc(enum diag_stat_enum nr);
+void diag_stat_inc_norecursion(enum diag_stat_enum nr);
+
 /*
  * Diagnose 10: Release page range
  */
@@ -18,6 +46,7 @@
 	start_addr = start_pfn << PAGE_SHIFT;
 	end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT;
 
+	diag_stat_inc(DIAG_STAT_X010);
 	asm volatile(
 		"0:	diag	%0,%1,0x10\n"
 		"1:\n"
diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h
index f7e5c36..105f90e 100644
--- a/arch/s390/include/asm/etr.h
+++ b/arch/s390/include/asm/etr.h
@@ -211,8 +211,9 @@
 #define ETR_PTFF_SGS	0x43	/* set gross steering rate */
 
 /* Functions needed by the machine check handler */
-void etr_switch_to_local(void);
-void etr_sync_check(void);
+int etr_switch_to_local(void);
+int etr_sync_check(void);
+void etr_queue_work(void);
 
 /* notifier for syncs */
 extern struct atomic_notifier_head s390_epoch_delta_notifier;
@@ -253,7 +254,8 @@
 } __attribute__ ((packed));
 
 /* Functions needed by the machine check handler */
-void stp_sync_check(void);
-void stp_island_check(void);
+int stp_sync_check(void);
+int stp_island_check(void);
+void stp_queue_work(void);
 
 #endif /* __S390_ETR_H */
diff --git a/arch/s390/include/asm/fpu-internal.h b/arch/s390/include/asm/fpu-internal.h
deleted file mode 100644
index 55dc2c0..0000000
--- a/arch/s390/include/asm/fpu-internal.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * General floating pointer and vector register helpers
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#ifndef _ASM_S390_FPU_INTERNAL_H
-#define _ASM_S390_FPU_INTERNAL_H
-
-#define FPU_USE_VX		1	/* Vector extension is active */
-
-#ifndef __ASSEMBLY__
-
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <asm/linkage.h>
-#include <asm/ctl_reg.h>
-#include <asm/sigcontext.h>
-
-struct fpu {
-	__u32 fpc;			/* Floating-point control */
-	__u32 flags;
-	union {
-		void *regs;
-		freg_t *fprs;		/* Floating-point register save area */
-		__vector128 *vxrs;	/* Vector register save area */
-	};
-};
-
-void save_fpu_regs(void);
-
-#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
-#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
-
-/* VX array structure for address operand constraints in inline assemblies */
-struct vx_array { __vector128 _[__NUM_VXRS]; };
-
-static inline int test_fp_ctl(u32 fpc)
-{
-	u32 orig_fpc;
-	int rc;
-
-	asm volatile(
-		"	efpc    %1\n"
-		"	sfpc	%2\n"
-		"0:	sfpc	%1\n"
-		"	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc), "=d" (orig_fpc)
-		: "d" (fpc), "0" (-EINVAL));
-	return rc;
-}
-
-static inline void save_vx_regs_safe(__vector128 *vxrs)
-{
-	unsigned long cr0, flags;
-
-	flags = arch_local_irq_save();
-	__ctl_store(cr0, 0, 0);
-	__ctl_set_bit(0, 17);
-	__ctl_set_bit(0, 18);
-	asm volatile(
-		"	la	1,%0\n"
-		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
-		: "=Q" (*(struct vx_array *) vxrs) : : "1");
-	__ctl_load(cr0, 0, 0);
-	arch_local_irq_restore(flags);
-}
-
-static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
-{
-	int i;
-
-	for (i = 0; i < __NUM_FPRS; i++)
-		fprs[i] = *(freg_t *)(vxrs + i);
-}
-
-static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
-{
-	int i;
-
-	for (i = 0; i < __NUM_FPRS; i++)
-		*(freg_t *)(vxrs + i) = fprs[i];
-}
-
-static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
-{
-	fpregs->pad = 0;
-	if (is_vx_fpu(fpu))
-		convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
-	else
-		memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
-		       sizeof(fpregs->fprs));
-}
-
-static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
-{
-	if (is_vx_fpu(fpu))
-		convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
-	else
-		memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
-		       sizeof(fpregs->fprs));
-}
-
-#endif
-
-#endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
new file mode 100644
index 0000000..5e04f3c
--- /dev/null
+++ b/arch/s390/include/asm/fpu/api.h
@@ -0,0 +1,30 @@
+/*
+ * In-kernel FPU support functions
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_API_H
+#define _ASM_S390_FPU_API_H
+
+void save_fpu_regs(void);
+
+static inline int test_fp_ctl(u32 fpc)
+{
+	u32 orig_fpc;
+	int rc;
+
+	asm volatile(
+		"	efpc    %1\n"
+		"	sfpc	%2\n"
+		"0:	sfpc	%1\n"
+		"	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc), "=d" (orig_fpc)
+		: "d" (fpc), "0" (-EINVAL));
+	return rc;
+}
+
+#endif /* _ASM_S390_FPU_API_H */
diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h
new file mode 100644
index 0000000..2559b16
--- /dev/null
+++ b/arch/s390/include/asm/fpu/internal.h
@@ -0,0 +1,67 @@
+/*
+ * FPU state and register content conversion primitives
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_INTERNAL_H
+#define _ASM_S390_FPU_INTERNAL_H
+
+#include <linux/string.h>
+#include <asm/ctl_reg.h>
+#include <asm/fpu/types.h>
+
+static inline void save_vx_regs_safe(__vector128 *vxrs)
+{
+	unsigned long cr0, flags;
+
+	flags = arch_local_irq_save();
+	__ctl_store(cr0, 0, 0);
+	__ctl_set_bit(0, 17);
+	__ctl_set_bit(0, 18);
+	asm volatile(
+		"	la	1,%0\n"
+		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
+		: "=Q" (*(struct vx_array *) vxrs) : : "1");
+	__ctl_load(cr0, 0, 0);
+	arch_local_irq_restore(flags);
+}
+
+static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		fprs[i] = *(freg_t *)(vxrs + i);
+}
+
+static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		*(freg_t *)(vxrs + i) = fprs[i];
+}
+
+static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	fpregs->pad = 0;
+	if (MACHINE_HAS_VX)
+		convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
+	else
+		memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
+		       sizeof(fpregs->fprs));
+}
+
+static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	if (MACHINE_HAS_VX)
+		convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
+	else
+		memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
+		       sizeof(fpregs->fprs));
+}
+
+#endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h
new file mode 100644
index 0000000..14a8b0c
--- /dev/null
+++ b/arch/s390/include/asm/fpu/types.h
@@ -0,0 +1,25 @@
+/*
+ * FPU data structures
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_TYPES_H
+#define _ASM_S390_FPU_TYPES_H
+
+#include <asm/sigcontext.h>
+
+struct fpu {
+	__u32 fpc;			/* Floating-point control */
+	union {
+		void *regs;
+		freg_t *fprs;		/* Floating-point register save area */
+		__vector128 *vxrs;	/* Vector register save area */
+	};
+};
+
+/* VX array structure for address operand constraints in inline assemblies */
+struct vx_array { __vector128 _[__NUM_VXRS]; };
+
+#endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index 113cd96..51ff96d 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -24,4 +24,6 @@
 extern struct device_attribute dev_attr_idle_count;
 extern struct device_attribute dev_attr_idle_time_us;
 
+void psw_idle(struct s390_idle_data *, unsigned long);
+
 #endif /* _S390_IDLE_H */
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index ff95d15..f97b055 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -47,7 +47,6 @@
 	IRQEXT_IUC,
 	IRQEXT_CMS,
 	IRQEXT_CMC,
-	IRQEXT_CMR,
 	IRQEXT_FTP,
 	IRQIO_CIO,
 	IRQIO_QAI,
@@ -96,6 +95,19 @@
 	IRQ_SUBCLASS_SERVICE_SIGNAL = 9,
 };
 
+#define CR0_IRQ_SUBCLASS_MASK					  \
+	((1UL << (63 - 30))  /* Warning Track */		| \
+	 (1UL << (63 - 48))  /* Malfunction Alert */		| \
+	 (1UL << (63 - 49))  /* Emergency Signal */		| \
+	 (1UL << (63 - 50))  /* External Call */		| \
+	 (1UL << (63 - 52))  /* Clock Comparator */		| \
+	 (1UL << (63 - 53))  /* CPU Timer */			| \
+	 (1UL << (63 - 54))  /* Service Signal */		| \
+	 (1UL << (63 - 57))  /* Interrupt Key */		| \
+	 (1UL << (63 - 58))  /* Measurement Alert */		| \
+	 (1UL << (63 - 59))  /* Timing Alert */			| \
+	 (1UL << (63 - 62))) /* IUCV */
+
 void irq_subclass_register(enum irq_subclass subclass);
 void irq_subclass_unregister(enum irq_subclass subclass);
 
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8ced426..7f65430 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -22,7 +22,7 @@
 #include <linux/kvm.h>
 #include <asm/debug.h>
 #include <asm/cpu.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/api.h>
 #include <asm/isc.h>
 
 #define KVM_MAX_VCPUS 64
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index e0f8423..4139305 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -27,10 +27,9 @@
 #define __S390_KVM_PARA_H
 
 #include <uapi/asm/kvm_para.h>
+#include <asm/diag.h>
 
-
-
-static inline long kvm_hypercall0(unsigned long nr)
+static inline long __kvm_hypercall0(unsigned long nr)
 {
 	register unsigned long __nr asm("1") = nr;
 	register long __rc asm("2");
@@ -40,7 +39,13 @@
 	return __rc;
 }
 
-static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
+static inline long kvm_hypercall0(unsigned long nr)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall0(nr);
+}
+
+static inline long __kvm_hypercall1(unsigned long nr, unsigned long p1)
 {
 	register unsigned long __nr asm("1") = nr;
 	register unsigned long __p1 asm("2") = p1;
@@ -51,7 +56,13 @@
 	return __rc;
 }
 
-static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
+static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall1(nr, p1);
+}
+
+static inline long __kvm_hypercall2(unsigned long nr, unsigned long p1,
 			       unsigned long p2)
 {
 	register unsigned long __nr asm("1") = nr;
@@ -65,7 +76,14 @@
 	return __rc;
 }
 
-static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
+static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
+			       unsigned long p2)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall2(nr, p1, p2);
+}
+
+static inline long __kvm_hypercall3(unsigned long nr, unsigned long p1,
 			       unsigned long p2, unsigned long p3)
 {
 	register unsigned long __nr asm("1") = nr;
@@ -80,8 +98,14 @@
 	return __rc;
 }
 
+static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall3(nr, p1, p2, p3);
+}
 
-static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
+static inline long __kvm_hypercall4(unsigned long nr, unsigned long p1,
 			       unsigned long p2, unsigned long p3,
 			       unsigned long p4)
 {
@@ -98,7 +122,15 @@
 	return __rc;
 }
 
-static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
+static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3,
+			       unsigned long p4)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall4(nr, p1, p2, p3, p4);
+}
+
+static inline long __kvm_hypercall5(unsigned long nr, unsigned long p1,
 			       unsigned long p2, unsigned long p3,
 			       unsigned long p4, unsigned long p5)
 {
@@ -116,7 +148,15 @@
 	return __rc;
 }
 
-static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
+static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3,
+			       unsigned long p4, unsigned long p5)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall5(nr, p1, p2, p3, p4, p5);
+}
+
+static inline long __kvm_hypercall6(unsigned long nr, unsigned long p1,
 			       unsigned long p2, unsigned long p3,
 			       unsigned long p4, unsigned long p5,
 			       unsigned long p6)
@@ -137,6 +177,15 @@
 	return __rc;
 }
 
+static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3,
+			       unsigned long p4, unsigned long p5,
+			       unsigned long p6)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall6(nr, p1, p2, p3, p4, p5, p6);
+}
+
 /* kvm on s390 is always paravirtualization enabled */
 static inline int kvm_para_available(void)
 {
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 663f23e..afe1cfe 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -67,7 +67,7 @@
 	__u8	pad_0x00c4[0x00c8-0x00c4];	/* 0x00c4 */
 	__u32	stfl_fac_list;			/* 0x00c8 */
 	__u8	pad_0x00cc[0x00e8-0x00cc];	/* 0x00cc */
-	__u32	mcck_interruption_code[2];	/* 0x00e8 */
+	__u64	mcck_interruption_code;		/* 0x00e8 */
 	__u8	pad_0x00f0[0x00f4-0x00f0];	/* 0x00f0 */
 	__u32	external_damage_code;		/* 0x00f4 */
 	__u64	failing_storage_address;	/* 0x00f8 */
@@ -132,7 +132,14 @@
 	/* Address space pointer. */
 	__u64	kernel_asce;			/* 0x0358 */
 	__u64	user_asce;			/* 0x0360 */
-	__u64	current_pid;			/* 0x0368 */
+
+	/*
+	 * The lpp and current_pid fields form a
+	 * 64-bit value that is set as program
+	 * parameter with the LPP instruction.
+	 */
+	__u32	lpp;				/* 0x0368 */
+	__u32	current_pid;			/* 0x036c */
 
 	/* SMP info area */
 	__u32	cpu_nr;				/* 0x0370 */
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 3027a5a..b75fd91 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -11,51 +11,62 @@
 #ifndef _ASM_S390_NMI_H
 #define _ASM_S390_NMI_H
 
+#include <linux/const.h>
 #include <linux/types.h>
 
-struct mci {
-	__u32 sd :  1; /* 00 system damage */
-	__u32 pd :  1; /* 01 instruction-processing damage */
-	__u32 sr :  1; /* 02 system recovery */
-	__u32	 :  1; /* 03 */
-	__u32 cd :  1; /* 04 timing-facility damage */
-	__u32 ed :  1; /* 05 external damage */
-	__u32	 :  1; /* 06 */
-	__u32 dg :  1; /* 07 degradation */
-	__u32 w  :  1; /* 08 warning pending */
-	__u32 cp :  1; /* 09 channel-report pending */
-	__u32 sp :  1; /* 10 service-processor damage */
-	__u32 ck :  1; /* 11 channel-subsystem damage */
-	__u32	 :  2; /* 12-13 */
-	__u32 b  :  1; /* 14 backed up */
-	__u32	 :  1; /* 15 */
-	__u32 se :  1; /* 16 storage error uncorrected */
-	__u32 sc :  1; /* 17 storage error corrected */
-	__u32 ke :  1; /* 18 storage-key error uncorrected */
-	__u32 ds :  1; /* 19 storage degradation */
-	__u32 wp :  1; /* 20 psw mwp validity */
-	__u32 ms :  1; /* 21 psw mask and key validity */
-	__u32 pm :  1; /* 22 psw program mask and cc validity */
-	__u32 ia :  1; /* 23 psw instruction address validity */
-	__u32 fa :  1; /* 24 failing storage address validity */
-	__u32 vr :  1; /* 25 vector register validity */
-	__u32 ec :  1; /* 26 external damage code validity */
-	__u32 fp :  1; /* 27 floating point register validity */
-	__u32 gr :  1; /* 28 general register validity */
-	__u32 cr :  1; /* 29 control register validity */
-	__u32	 :  1; /* 30 */
-	__u32 st :  1; /* 31 storage logical validity */
-	__u32 ie :  1; /* 32 indirect storage error */
-	__u32 ar :  1; /* 33 access register validity */
-	__u32 da :  1; /* 34 delayed access exception */
-	__u32	 :  7; /* 35-41 */
-	__u32 pr :  1; /* 42 tod programmable register validity */
-	__u32 fc :  1; /* 43 fp control register validity */
-	__u32 ap :  1; /* 44 ancillary report */
-	__u32	 :  1; /* 45 */
-	__u32 ct :  1; /* 46 cpu timer validity */
-	__u32 cc :  1; /* 47 clock comparator validity */
-	__u32	 : 16; /* 47-63 */
+#define MCCK_CODE_SYSTEM_DAMAGE		_BITUL(63)
+#define MCCK_CODE_CPU_TIMER_VALID	_BITUL(63 - 46)
+#define MCCK_CODE_PSW_MWP_VALID		_BITUL(63 - 20)
+#define MCCK_CODE_PSW_IA_VALID		_BITUL(63 - 23)
+
+#ifndef __ASSEMBLY__
+
+union mci {
+	unsigned long val;
+	struct {
+		u64 sd :  1; /* 00 system damage */
+		u64 pd :  1; /* 01 instruction-processing damage */
+		u64 sr :  1; /* 02 system recovery */
+		u64    :  1; /* 03 */
+		u64 cd :  1; /* 04 timing-facility damage */
+		u64 ed :  1; /* 05 external damage */
+		u64    :  1; /* 06 */
+		u64 dg :  1; /* 07 degradation */
+		u64 w  :  1; /* 08 warning pending */
+		u64 cp :  1; /* 09 channel-report pending */
+		u64 sp :  1; /* 10 service-processor damage */
+		u64 ck :  1; /* 11 channel-subsystem damage */
+		u64    :  2; /* 12-13 */
+		u64 b  :  1; /* 14 backed up */
+		u64    :  1; /* 15 */
+		u64 se :  1; /* 16 storage error uncorrected */
+		u64 sc :  1; /* 17 storage error corrected */
+		u64 ke :  1; /* 18 storage-key error uncorrected */
+		u64 ds :  1; /* 19 storage degradation */
+		u64 wp :  1; /* 20 psw mwp validity */
+		u64 ms :  1; /* 21 psw mask and key validity */
+		u64 pm :  1; /* 22 psw program mask and cc validity */
+		u64 ia :  1; /* 23 psw instruction address validity */
+		u64 fa :  1; /* 24 failing storage address validity */
+		u64 vr :  1; /* 25 vector register validity */
+		u64 ec :  1; /* 26 external damage code validity */
+		u64 fp :  1; /* 27 floating point register validity */
+		u64 gr :  1; /* 28 general register validity */
+		u64 cr :  1; /* 29 control register validity */
+		u64    :  1; /* 30 */
+		u64 st :  1; /* 31 storage logical validity */
+		u64 ie :  1; /* 32 indirect storage error */
+		u64 ar :  1; /* 33 access register validity */
+		u64 da :  1; /* 34 delayed access exception */
+		u64    :  7; /* 35-41 */
+		u64 pr :  1; /* 42 tod programmable register validity */
+		u64 fc :  1; /* 43 fp control register validity */
+		u64 ap :  1; /* 44 ancillary report */
+		u64    :  1; /* 45 */
+		u64 ct :  1; /* 46 cpu timer validity */
+		u64 cc :  1; /* 47 clock comparator validity */
+		u64    : 16; /* 47-63 */
+	};
 };
 
 struct pt_regs;
@@ -63,4 +74,5 @@
 extern void s390_handle_mcck(void);
 extern void s390_do_machine_check(struct pt_regs *regs);
 
+#endif /* __ASSEMBLY__ */
 #endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index bdb2f51..024f85f 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -193,9 +193,15 @@
 #define _PAGE_UNUSED	0x080		/* SW bit for pgste usage state */
 #define __HAVE_ARCH_PTE_SPECIAL
 
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SOFT_DIRTY 0x002		/* SW pte soft dirty bit */
+#else
+#define _PAGE_SOFT_DIRTY 0x000
+#endif
+
 /* Set of bits not changed in pte_modify */
 #define _PAGE_CHG_MASK		(PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \
-				 _PAGE_YOUNG)
+				 _PAGE_YOUNG | _PAGE_SOFT_DIRTY)
 
 /*
  * handle_pte_fault uses pte_present and pte_none to find out the pte type
@@ -285,6 +291,12 @@
 #define _SEGMENT_ENTRY_READ	0x0002	/* SW segment read bit */
 #define _SEGMENT_ENTRY_WRITE	0x0001	/* SW segment write bit */
 
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */
+#else
+#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
+#endif
+
 /*
  * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
  *				dy..R...I...wr
@@ -589,6 +601,43 @@
 }
 #endif
 
+static inline int pte_soft_dirty(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_SOFT_DIRTY;
+}
+#define pte_swp_soft_dirty pte_soft_dirty
+
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_SOFT_DIRTY;
+	return pte;
+}
+#define pte_swp_mksoft_dirty pte_mksoft_dirty
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+	pte_val(pte) &= ~_PAGE_SOFT_DIRTY;
+	return pte;
+}
+#define pte_swp_clear_soft_dirty pte_clear_soft_dirty
+
+static inline int pmd_soft_dirty(pmd_t pmd)
+{
+	return pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY;
+}
+
+static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
+{
+	pmd_val(pmd) |= _SEGMENT_ENTRY_SOFT_DIRTY;
+	return pmd;
+}
+
+static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~_SEGMENT_ENTRY_SOFT_DIRTY;
+	return pmd;
+}
+
 static inline pgste_t pgste_get_lock(pte_t *ptep)
 {
 	unsigned long new = 0;
@@ -889,7 +938,7 @@
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
-	pte_val(pte) |= _PAGE_DIRTY;
+	pte_val(pte) |= _PAGE_DIRTY | _PAGE_SOFT_DIRTY;
 	if (pte_val(pte) & _PAGE_WRITE)
 		pte_val(pte) &= ~_PAGE_PROTECT;
 	return pte;
@@ -1218,8 +1267,10 @@
 					pte_t entry, int dirty)
 {
 	pgste_t pgste;
+	pte_t oldpte;
 
-	if (pte_same(*ptep, entry))
+	oldpte = *ptep;
+	if (pte_same(oldpte, entry))
 		return 0;
 	if (mm_has_pgste(vma->vm_mm)) {
 		pgste = pgste_get_lock(ptep);
@@ -1229,7 +1280,8 @@
 	ptep_flush_direct(vma->vm_mm, address, ptep);
 
 	if (mm_has_pgste(vma->vm_mm)) {
-		pgste_set_key(ptep, pgste, entry, vma->vm_mm);
+		if (pte_val(oldpte) & _PAGE_INVALID)
+			pgste_set_key(ptep, pgste, entry, vma->vm_mm);
 		pgste = pgste_set_pte(ptep, pgste, entry);
 		pgste_set_unlock(ptep, pgste);
 	} else
@@ -1340,7 +1392,8 @@
 static inline pmd_t pmd_mkdirty(pmd_t pmd)
 {
 	if (pmd_large(pmd)) {
-		pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY;
+		pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY |
+				_SEGMENT_ENTRY_SOFT_DIRTY;
 		if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
 			pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
 	}
@@ -1371,7 +1424,8 @@
 	if (pmd_large(pmd)) {
 		pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
 			_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
-			_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT;
+			_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT |
+			_SEGMENT_ENTRY_SOFT_DIRTY;
 		pmd_val(pmd) |= massage_pgprot_pmd(newprot);
 		if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
 			pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 085fb0d..b16c3d0 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -11,15 +11,19 @@
 #ifndef __ASM_S390_PROCESSOR_H
 #define __ASM_S390_PROCESSOR_H
 
+#include <linux/const.h>
+
 #define CIF_MCCK_PENDING	0	/* machine check handling is pending */
 #define CIF_ASCE		1	/* user asce needs fixup / uaccess */
 #define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
-#define CIF_FPU			3	/* restore vector registers */
+#define CIF_FPU			3	/* restore FPU registers */
+#define CIF_IGNORE_IRQ		4	/* ignore interrupt (for udelay) */
 
-#define _CIF_MCCK_PENDING	(1<<CIF_MCCK_PENDING)
-#define _CIF_ASCE		(1<<CIF_ASCE)
-#define _CIF_NOHZ_DELAY		(1<<CIF_NOHZ_DELAY)
-#define _CIF_FPU		(1<<CIF_FPU)
+#define _CIF_MCCK_PENDING	_BITUL(CIF_MCCK_PENDING)
+#define _CIF_ASCE		_BITUL(CIF_ASCE)
+#define _CIF_NOHZ_DELAY		_BITUL(CIF_NOHZ_DELAY)
+#define _CIF_FPU		_BITUL(CIF_FPU)
+#define _CIF_IGNORE_IRQ		_BITUL(CIF_IGNORE_IRQ)
 
 #ifndef __ASSEMBLY__
 
@@ -30,21 +34,22 @@
 #include <asm/ptrace.h>
 #include <asm/setup.h>
 #include <asm/runtime_instr.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/types.h>
+#include <asm/fpu/internal.h>
 
 static inline void set_cpu_flag(int flag)
 {
-	S390_lowcore.cpu_flags |= (1U << flag);
+	S390_lowcore.cpu_flags |= (1UL << flag);
 }
 
 static inline void clear_cpu_flag(int flag)
 {
-	S390_lowcore.cpu_flags &= ~(1U << flag);
+	S390_lowcore.cpu_flags &= ~(1UL << flag);
 }
 
 static inline int test_cpu_flag(int flag)
 {
-	return !!(S390_lowcore.cpu_flags & (1U << flag));
+	return !!(S390_lowcore.cpu_flags & (1UL << flag));
 }
 
 #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
@@ -102,7 +107,6 @@
 	struct list_head list;
 	/* cpu runtime instrumentation */
 	struct runtime_instr_cb *ri_cb;
-	int ri_signum;
 	unsigned char trap_tdb[256];	/* Transaction abort diagnose block */
 };
 
@@ -139,8 +143,10 @@
 
 #define ARCH_MIN_TASKALIGN	8
 
+extern __vector128 init_task_fpu_regs[__NUM_VXRS];
 #define INIT_THREAD {							\
 	.ksp = sizeof(init_stack) + (unsigned long) &init_stack,	\
+	.fpu.regs = (void *)&init_task_fpu_regs,			\
 }
 
 /*
@@ -217,7 +223,7 @@
  * Set PSW mask to specified value, while leaving the
  * PSW addr pointing to the next instruction.
  */
-static inline void __load_psw_mask (unsigned long mask)
+static inline void __load_psw_mask(unsigned long mask)
 {
 	unsigned long addr;
 	psw_t psw;
@@ -243,6 +249,16 @@
 	return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
 }
 
+static inline void local_mcck_enable(void)
+{
+	__load_psw_mask(__extract_psw() | PSW_MASK_MCHECK);
+}
+
+static inline void local_mcck_disable(void)
+{
+	__load_psw_mask(__extract_psw() & ~PSW_MASK_MCHECK);
+}
+
 /*
  * Rewind PSW instruction address by specified number of bytes.
  */
@@ -266,66 +282,15 @@
  */
 static inline void __noreturn disabled_wait(unsigned long code)
 {
-        unsigned long ctl_buf;
-        psw_t dw_psw;
+	psw_t psw;
 
-	dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
-        dw_psw.addr = code;
-        /* 
-         * Store status and then load disabled wait psw,
-         * the processor is dead afterwards
-         */
-	asm volatile(
-		"	stctg	0,0,0(%2)\n"
-		"	ni	4(%2),0xef\n"	/* switch off protection */
-		"	lctlg	0,0,0(%2)\n"
-		"	lghi	1,0x1000\n"
-		"	stpt	0x328(1)\n"	/* store timer */
-		"	stckc	0x330(1)\n"	/* store clock comparator */
-		"	stpx	0x318(1)\n"	/* store prefix register */
-		"	stam	0,15,0x340(1)\n"/* store access registers */
-		"	stfpc	0x31c(1)\n"	/* store fpu control */
-		"	std	0,0x200(1)\n"	/* store f0 */
-		"	std	1,0x208(1)\n"	/* store f1 */
-		"	std	2,0x210(1)\n"	/* store f2 */
-		"	std	3,0x218(1)\n"	/* store f3 */
-		"	std	4,0x220(1)\n"	/* store f4 */
-		"	std	5,0x228(1)\n"	/* store f5 */
-		"	std	6,0x230(1)\n"	/* store f6 */
-		"	std	7,0x238(1)\n"	/* store f7 */
-		"	std	8,0x240(1)\n"	/* store f8 */
-		"	std	9,0x248(1)\n"	/* store f9 */
-		"	std	10,0x250(1)\n"	/* store f10 */
-		"	std	11,0x258(1)\n"	/* store f11 */
-		"	std	12,0x260(1)\n"	/* store f12 */
-		"	std	13,0x268(1)\n"	/* store f13 */
-		"	std	14,0x270(1)\n"	/* store f14 */
-		"	std	15,0x278(1)\n"	/* store f15 */
-		"	stmg	0,15,0x280(1)\n"/* store general registers */
-		"	stctg	0,15,0x380(1)\n"/* store control registers */
-		"	oi	0x384(1),0x10\n"/* fake protection bit */
-		"	lpswe	0(%1)"
-		: "=m" (ctl_buf)
-		: "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0", "1");
+	psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
+	psw.addr = code;
+	__load_psw(psw);
 	while (1);
 }
 
 /*
- * Use to set psw mask except for the first byte which
- * won't be changed by this function.
- */
-static inline void
-__set_psw_mask(unsigned long mask)
-{
-	__load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8)));
-}
-
-#define local_mcck_enable() \
-	__set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK)
-#define local_mcck_disable() \
-	__set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT)
-
-/*
  * Basic Machine Check/Program Check Handler.
  */
 
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 6feda25..37cbc50 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -6,13 +6,14 @@
 #ifndef _S390_PTRACE_H
 #define _S390_PTRACE_H
 
+#include <linux/const.h>
 #include <uapi/asm/ptrace.h>
 
 #define PIF_SYSCALL		0	/* inside a system call */
 #define PIF_PER_TRAP		1	/* deliver sigtrap on return to user */
 
-#define _PIF_SYSCALL		(1<<PIF_SYSCALL)
-#define _PIF_PER_TRAP		(1<<PIF_PER_TRAP)
+#define _PIF_SYSCALL		_BITUL(PIF_SYSCALL)
+#define _PIF_PER_TRAP		_BITUL(PIF_PER_TRAP)
 
 #ifndef __ASSEMBLY__
 
@@ -128,17 +129,17 @@
 
 static inline void set_pt_regs_flag(struct pt_regs *regs, int flag)
 {
-	regs->flags |= (1U << flag);
+	regs->flags |= (1UL << flag);
 }
 
 static inline void clear_pt_regs_flag(struct pt_regs *regs, int flag)
 {
-	regs->flags &= ~(1U << flag);
+	regs->flags &= ~(1UL << flag);
 }
 
 static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
 {
-	return !!(regs->flags & (1U << flag));
+	return !!(regs->flags & (1UL << flag));
 }
 
 /*
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index b8ffc1b..2353766 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -5,11 +5,38 @@
 #ifndef _ASM_S390_SETUP_H
 #define _ASM_S390_SETUP_H
 
+#include <linux/const.h>
 #include <uapi/asm/setup.h>
 
 
 #define PARMAREA		0x10400
 
+/*
+ * Machine features detected in head.S
+ */
+
+#define MACHINE_FLAG_VM		_BITUL(0)
+#define MACHINE_FLAG_IEEE	_BITUL(1)
+#define MACHINE_FLAG_CSP	_BITUL(2)
+#define MACHINE_FLAG_MVPG	_BITUL(3)
+#define MACHINE_FLAG_DIAG44	_BITUL(4)
+#define MACHINE_FLAG_IDTE	_BITUL(5)
+#define MACHINE_FLAG_DIAG9C	_BITUL(6)
+#define MACHINE_FLAG_KVM	_BITUL(8)
+#define MACHINE_FLAG_ESOP	_BITUL(9)
+#define MACHINE_FLAG_EDAT1	_BITUL(10)
+#define MACHINE_FLAG_EDAT2	_BITUL(11)
+#define MACHINE_FLAG_LPAR	_BITUL(12)
+#define MACHINE_FLAG_LPP	_BITUL(13)
+#define MACHINE_FLAG_TOPOLOGY	_BITUL(14)
+#define MACHINE_FLAG_TE		_BITUL(15)
+#define MACHINE_FLAG_TLB_LC	_BITUL(17)
+#define MACHINE_FLAG_VX		_BITUL(18)
+#define MACHINE_FLAG_CAD	_BITUL(19)
+
+#define LPP_MAGIC		_BITUL(31)
+#define LPP_PFAULT_PID_MASK	_AC(0xffffffff, UL)
+
 #ifndef __ASSEMBLY__
 
 #include <asm/lowcore.h>
@@ -28,29 +55,6 @@
 
 extern void detect_memory_memblock(void);
 
-/*
- * Machine features detected in head.S
- */
-
-#define MACHINE_FLAG_VM		(1UL << 0)
-#define MACHINE_FLAG_IEEE	(1UL << 1)
-#define MACHINE_FLAG_CSP	(1UL << 2)
-#define MACHINE_FLAG_MVPG	(1UL << 3)
-#define MACHINE_FLAG_DIAG44	(1UL << 4)
-#define MACHINE_FLAG_IDTE	(1UL << 5)
-#define MACHINE_FLAG_DIAG9C	(1UL << 6)
-#define MACHINE_FLAG_KVM	(1UL << 8)
-#define MACHINE_FLAG_ESOP	(1UL << 9)
-#define MACHINE_FLAG_EDAT1	(1UL << 10)
-#define MACHINE_FLAG_EDAT2	(1UL << 11)
-#define MACHINE_FLAG_LPAR	(1UL << 12)
-#define MACHINE_FLAG_LPP	(1UL << 13)
-#define MACHINE_FLAG_TOPOLOGY	(1UL << 14)
-#define MACHINE_FLAG_TE		(1UL << 15)
-#define MACHINE_FLAG_TLB_LC	(1UL << 17)
-#define MACHINE_FLAG_VX		(1UL << 18)
-#define MACHINE_FLAG_CAD	(1UL << 19)
-
 #define MACHINE_IS_VM		(S390_lowcore.machine_flags & MACHINE_FLAG_VM)
 #define MACHINE_IS_KVM		(S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
 #define MACHINE_IS_LPAR		(S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 0e37cd0..63ebf37 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -87,7 +87,6 @@
 {
 	typecheck(unsigned int, lp->lock);
 	asm volatile(
-		__ASM_BARRIER
 		"st	%1,%0\n"
 		: "+Q" (lp->lock)
 		: "d" (0)
@@ -169,7 +168,6 @@
 							\
 	typecheck(unsigned int *, ptr);			\
 	asm volatile(					\
-		"bcr	14,0\n"				\
 		op_string "	%0,%2,%1\n"		\
 		: "=d" (old_val), "+Q" (*ptr)		\
 		: "d" (op_val)				\
@@ -243,7 +241,6 @@
 
 	rw->owner = 0;
 	asm volatile(
-		__ASM_BARRIER
 		"st	%1,%0\n"
 		: "+Q" (rw->lock)
 		: "d" (0)
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index dcadfde..12d45f0 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -8,7 +8,7 @@
 #define __ASM_SWITCH_TO_H
 
 #include <linux/thread_info.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/api.h>
 #include <asm/ptrace.h>
 
 extern struct task_struct *__switch_to(void *, void *);
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 4c27ec7..692b924 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -7,6 +7,8 @@
 #ifndef _ASM_THREAD_INFO_H
 #define _ASM_THREAD_INFO_H
 
+#include <linux/const.h>
+
 /*
  * Size of kernel stack for each process
  */
@@ -83,16 +85,16 @@
 #define TIF_BLOCK_STEP		20	/* This task is block stepped */
 #define TIF_UPROBE_SINGLESTEP	21	/* This task is uprobe single stepped */
 
-#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
-#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
-#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
-#define _TIF_SECCOMP		(1<<TIF_SECCOMP)
-#define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
-#define _TIF_UPROBE		(1<<TIF_UPROBE)
-#define _TIF_31BIT		(1<<TIF_31BIT)
-#define _TIF_SINGLE_STEP	(1<<TIF_SINGLE_STEP)
+#define _TIF_NOTIFY_RESUME	_BITUL(TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING		_BITUL(TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	_BITUL(TIF_NEED_RESCHED)
+#define _TIF_SYSCALL_TRACE	_BITUL(TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT	_BITUL(TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		_BITUL(TIF_SECCOMP)
+#define _TIF_SYSCALL_TRACEPOINT	_BITUL(TIF_SYSCALL_TRACEPOINT)
+#define _TIF_UPROBE		_BITUL(TIF_UPROBE)
+#define _TIF_31BIT		_BITUL(TIF_31BIT)
+#define _TIF_SINGLE_STEP	_BITUL(TIF_SINGLE_STEP)
 
 #define is_32bit_task()		(test_thread_flag(TIF_31BIT))
 
diff --git a/arch/s390/include/asm/trace/diag.h b/arch/s390/include/asm/trace/diag.h
new file mode 100644
index 0000000..776f307
--- /dev/null
+++ b/arch/s390/include/asm/trace/diag.h
@@ -0,0 +1,43 @@
+/*
+ * Tracepoint header for s390 diagnose calls
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM s390
+
+#if !defined(_TRACE_S390_DIAG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_S390_DIAG_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm/trace
+#define TRACE_INCLUDE_FILE diag
+
+TRACE_EVENT(diagnose,
+	TP_PROTO(unsigned short nr),
+	TP_ARGS(nr),
+	TP_STRUCT__entry(
+		__field(unsigned short, nr)
+	),
+	TP_fast_assign(
+		__entry->nr = nr;
+	),
+	TP_printk("nr=0x%x", __entry->nr)
+);
+
+#ifdef CONFIG_TRACEPOINTS
+void trace_diagnose_norecursion(int diag_nr);
+#else
+static inline void trace_diagnose_norecursion(int diag_nr) { }
+#endif
+
+#endif /* _TRACE_S390_DIAG_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index b756c63..dc167a2 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -66,6 +66,8 @@
 obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o perf_cpum_sf.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf_events.o
 
+obj-$(CONFIG_TRACEPOINTS)	+= trace.o
+
 # vdso
 obj-y				+= vdso64/
 obj-$(CONFIG_COMPAT)		+= vdso32/
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 3aeeb1b..9cd248f 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -23,59 +23,64 @@
 
 int main(void)
 {
-	DEFINE(__TASK_thread_info, offsetof(struct task_struct, stack));
-	DEFINE(__TASK_thread, offsetof(struct task_struct, thread));
-	DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
+	/* task struct offsets */
+	OFFSET(__TASK_thread_info, task_struct, stack);
+	OFFSET(__TASK_thread, task_struct, thread);
+	OFFSET(__TASK_pid, task_struct, pid);
 	BLANK();
-	DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
-	DEFINE(__THREAD_FPU_fpc, offsetof(struct thread_struct, fpu.fpc));
-	DEFINE(__THREAD_FPU_flags, offsetof(struct thread_struct, fpu.flags));
-	DEFINE(__THREAD_FPU_regs, offsetof(struct thread_struct, fpu.regs));
-	DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
-	DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
-	DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid));
-	DEFINE(__THREAD_trap_tdb, offsetof(struct thread_struct, trap_tdb));
+	/* thread struct offsets */
+	OFFSET(__THREAD_ksp, thread_struct, ksp);
+	OFFSET(__THREAD_FPU_fpc, thread_struct, fpu.fpc);
+	OFFSET(__THREAD_FPU_regs, thread_struct, fpu.regs);
+	OFFSET(__THREAD_per_cause, thread_struct, per_event.cause);
+	OFFSET(__THREAD_per_address, thread_struct, per_event.address);
+	OFFSET(__THREAD_per_paid, thread_struct, per_event.paid);
+	OFFSET(__THREAD_trap_tdb, thread_struct, trap_tdb);
 	BLANK();
-	DEFINE(__TI_task, offsetof(struct thread_info, task));
-	DEFINE(__TI_flags, offsetof(struct thread_info, flags));
-	DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
-	DEFINE(__TI_cpu, offsetof(struct thread_info, cpu));
-	DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count));
-	DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer));
-	DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer));
-	DEFINE(__TI_last_break, offsetof(struct thread_info, last_break));
+	/* thread info offsets */
+	OFFSET(__TI_task, thread_info, task);
+	OFFSET(__TI_flags, thread_info, flags);
+	OFFSET(__TI_sysc_table, thread_info, sys_call_table);
+	OFFSET(__TI_cpu, thread_info, cpu);
+	OFFSET(__TI_precount, thread_info, preempt_count);
+	OFFSET(__TI_user_timer, thread_info, user_timer);
+	OFFSET(__TI_system_timer, thread_info, system_timer);
+	OFFSET(__TI_last_break, thread_info, last_break);
 	BLANK();
-	DEFINE(__PT_ARGS, offsetof(struct pt_regs, args));
-	DEFINE(__PT_PSW, offsetof(struct pt_regs, psw));
-	DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs));
-	DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2));
-	DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code));
-	DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm));
-	DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long));
-	DEFINE(__PT_FLAGS, offsetof(struct pt_regs, flags));
+	/* pt_regs offsets */
+	OFFSET(__PT_ARGS, pt_regs, args);
+	OFFSET(__PT_PSW, pt_regs, psw);
+	OFFSET(__PT_GPRS, pt_regs, gprs);
+	OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
+	OFFSET(__PT_INT_CODE, pt_regs, int_code);
+	OFFSET(__PT_INT_PARM, pt_regs, int_parm);
+	OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long);
+	OFFSET(__PT_FLAGS, pt_regs, flags);
 	DEFINE(__PT_SIZE, sizeof(struct pt_regs));
 	BLANK();
-	DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain));
-	DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs));
-	DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1));
+	/* stack_frame offsets */
+	OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
+	OFFSET(__SF_GPRS, stack_frame, gprs);
+	OFFSET(__SF_EMPTY, stack_frame, empty1);
 	BLANK();
 	/* timeval/timezone offsets for use by vdso */
-	DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count));
-	DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp));
-	DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec));
-	DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
-	DEFINE(__VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
-	DEFINE(__VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
-	DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
-	DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
-	DEFINE(__VDSO_WTOM_CRS_SEC, offsetof(struct vdso_data, wtom_coarse_sec));
-	DEFINE(__VDSO_WTOM_CRS_NSEC, offsetof(struct vdso_data, wtom_coarse_nsec));
-	DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
-	DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
-	DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult));
-	DEFINE(__VDSO_TK_SHIFT, offsetof(struct vdso_data, tk_shift));
-	DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base));
-	DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time));
+	OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
+	OFFSET(__VDSO_XTIME_STAMP, vdso_data, xtime_tod_stamp);
+	OFFSET(__VDSO_XTIME_SEC, vdso_data, xtime_clock_sec);
+	OFFSET(__VDSO_XTIME_NSEC, vdso_data, xtime_clock_nsec);
+	OFFSET(__VDSO_XTIME_CRS_SEC, vdso_data, xtime_coarse_sec);
+	OFFSET(__VDSO_XTIME_CRS_NSEC, vdso_data, xtime_coarse_nsec);
+	OFFSET(__VDSO_WTOM_SEC, vdso_data, wtom_clock_sec);
+	OFFSET(__VDSO_WTOM_NSEC, vdso_data, wtom_clock_nsec);
+	OFFSET(__VDSO_WTOM_CRS_SEC, vdso_data, wtom_coarse_sec);
+	OFFSET(__VDSO_WTOM_CRS_NSEC, vdso_data, wtom_coarse_nsec);
+	OFFSET(__VDSO_TIMEZONE, vdso_data, tz_minuteswest);
+	OFFSET(__VDSO_ECTG_OK, vdso_data, ectg_available);
+	OFFSET(__VDSO_TK_MULT, vdso_data, tk_mult);
+	OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift);
+	OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
+	OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
+	BLANK();
 	/* constants used by the vdso */
 	DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
 	DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
@@ -86,102 +91,105 @@
 	DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC);
 	BLANK();
 	/* idle data offsets */
-	DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter));
-	DEFINE(__CLOCK_IDLE_EXIT, offsetof(struct s390_idle_data, clock_idle_exit));
-	DEFINE(__TIMER_IDLE_ENTER, offsetof(struct s390_idle_data, timer_idle_enter));
-	DEFINE(__TIMER_IDLE_EXIT, offsetof(struct s390_idle_data, timer_idle_exit));
-	/* lowcore offsets */
-	DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params));
-	DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr));
-	DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code));
-	DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc));
-	DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code));
-	DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
-	DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
-	DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
-	DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num));
-	DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
-	DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
-	DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
-	DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id));
-	DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
-	DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id));
-	DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
-	DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code));
-	DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
-	DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
-	DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
-	DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
-	DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
-	DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
-	DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code));
-	DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
-	DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
-	DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
-	DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw));
-	DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw));
-	DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw));
-	DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw));
-	DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw));
-	DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw));
-	DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw));
-	DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw));
-	DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw));
+	OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
+	OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit);
+	OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter);
+	OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit);
 	BLANK();
-	DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync));
-	DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async));
-	DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart));
-	DEFINE(__LC_CPU_FLAGS, offsetof(struct _lowcore, cpu_flags));
-	DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw));
-	DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw));
-	DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer));
-	DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer));
-	DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer));
-	DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer));
-	DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer));
-	DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer));
-	DEFINE(__LC_STEAL_TIMER, offsetof(struct _lowcore, steal_timer));
-	DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer));
-	DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock));
-	DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task));
-	DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid));
-	DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info));
-	DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack));
-	DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack));
-	DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack));
-	DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack));
-	DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn));
-	DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data));
-	DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source));
-	DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce));
-	DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));
-	DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
-	DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
-	DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
-	DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
+	/* hardware defined lowcore locations 0x000 - 0x1ff */
+	OFFSET(__LC_EXT_PARAMS, _lowcore, ext_params);
+	OFFSET(__LC_EXT_CPU_ADDR, _lowcore, ext_cpu_addr);
+	OFFSET(__LC_EXT_INT_CODE, _lowcore, ext_int_code);
+	OFFSET(__LC_SVC_ILC, _lowcore, svc_ilc);
+	OFFSET(__LC_SVC_INT_CODE, _lowcore, svc_code);
+	OFFSET(__LC_PGM_ILC, _lowcore, pgm_ilc);
+	OFFSET(__LC_PGM_INT_CODE, _lowcore, pgm_code);
+	OFFSET(__LC_DATA_EXC_CODE, _lowcore, data_exc_code);
+	OFFSET(__LC_MON_CLASS_NR, _lowcore, mon_class_num);
+	OFFSET(__LC_PER_CODE, _lowcore, per_code);
+	OFFSET(__LC_PER_ATMID, _lowcore, per_atmid);
+	OFFSET(__LC_PER_ADDRESS, _lowcore, per_address);
+	OFFSET(__LC_EXC_ACCESS_ID, _lowcore, exc_access_id);
+	OFFSET(__LC_PER_ACCESS_ID, _lowcore, per_access_id);
+	OFFSET(__LC_OP_ACCESS_ID, _lowcore, op_access_id);
+	OFFSET(__LC_AR_MODE_ID, _lowcore, ar_mode_id);
+	OFFSET(__LC_TRANS_EXC_CODE, _lowcore, trans_exc_code);
+	OFFSET(__LC_MON_CODE, _lowcore, monitor_code);
+	OFFSET(__LC_SUBCHANNEL_ID, _lowcore, subchannel_id);
+	OFFSET(__LC_SUBCHANNEL_NR, _lowcore, subchannel_nr);
+	OFFSET(__LC_IO_INT_PARM, _lowcore, io_int_parm);
+	OFFSET(__LC_IO_INT_WORD, _lowcore, io_int_word);
+	OFFSET(__LC_STFL_FAC_LIST, _lowcore, stfl_fac_list);
+	OFFSET(__LC_MCCK_CODE, _lowcore, mcck_interruption_code);
+	OFFSET(__LC_MCCK_FAIL_STOR_ADDR, _lowcore, failing_storage_address);
+	OFFSET(__LC_LAST_BREAK, _lowcore, breaking_event_addr);
+	OFFSET(__LC_RST_OLD_PSW, _lowcore, restart_old_psw);
+	OFFSET(__LC_EXT_OLD_PSW, _lowcore, external_old_psw);
+	OFFSET(__LC_SVC_OLD_PSW, _lowcore, svc_old_psw);
+	OFFSET(__LC_PGM_OLD_PSW, _lowcore, program_old_psw);
+	OFFSET(__LC_MCK_OLD_PSW, _lowcore, mcck_old_psw);
+	OFFSET(__LC_IO_OLD_PSW, _lowcore, io_old_psw);
+	OFFSET(__LC_RST_NEW_PSW, _lowcore, restart_psw);
+	OFFSET(__LC_EXT_NEW_PSW, _lowcore, external_new_psw);
+	OFFSET(__LC_SVC_NEW_PSW, _lowcore, svc_new_psw);
+	OFFSET(__LC_PGM_NEW_PSW, _lowcore, program_new_psw);
+	OFFSET(__LC_MCK_NEW_PSW, _lowcore, mcck_new_psw);
+	OFFSET(__LC_IO_NEW_PSW, _lowcore, io_new_psw);
+	/* software defined lowcore locations 0x200 - 0xdff*/
+	OFFSET(__LC_SAVE_AREA_SYNC, _lowcore, save_area_sync);
+	OFFSET(__LC_SAVE_AREA_ASYNC, _lowcore, save_area_async);
+	OFFSET(__LC_SAVE_AREA_RESTART, _lowcore, save_area_restart);
+	OFFSET(__LC_CPU_FLAGS, _lowcore, cpu_flags);
+	OFFSET(__LC_RETURN_PSW, _lowcore, return_psw);
+	OFFSET(__LC_RETURN_MCCK_PSW, _lowcore, return_mcck_psw);
+	OFFSET(__LC_SYNC_ENTER_TIMER, _lowcore, sync_enter_timer);
+	OFFSET(__LC_ASYNC_ENTER_TIMER, _lowcore, async_enter_timer);
+	OFFSET(__LC_MCCK_ENTER_TIMER, _lowcore, mcck_enter_timer);
+	OFFSET(__LC_EXIT_TIMER, _lowcore, exit_timer);
+	OFFSET(__LC_USER_TIMER, _lowcore, user_timer);
+	OFFSET(__LC_SYSTEM_TIMER, _lowcore, system_timer);
+	OFFSET(__LC_STEAL_TIMER, _lowcore, steal_timer);
+	OFFSET(__LC_LAST_UPDATE_TIMER, _lowcore, last_update_timer);
+	OFFSET(__LC_LAST_UPDATE_CLOCK, _lowcore, last_update_clock);
+	OFFSET(__LC_INT_CLOCK, _lowcore, int_clock);
+	OFFSET(__LC_MCCK_CLOCK, _lowcore, mcck_clock);
+	OFFSET(__LC_CURRENT, _lowcore, current_task);
+	OFFSET(__LC_THREAD_INFO, _lowcore, thread_info);
+	OFFSET(__LC_KERNEL_STACK, _lowcore, kernel_stack);
+	OFFSET(__LC_ASYNC_STACK, _lowcore, async_stack);
+	OFFSET(__LC_PANIC_STACK, _lowcore, panic_stack);
+	OFFSET(__LC_RESTART_STACK, _lowcore, restart_stack);
+	OFFSET(__LC_RESTART_FN, _lowcore, restart_fn);
+	OFFSET(__LC_RESTART_DATA, _lowcore, restart_data);
+	OFFSET(__LC_RESTART_SOURCE, _lowcore, restart_source);
+	OFFSET(__LC_USER_ASCE, _lowcore, user_asce);
+	OFFSET(__LC_LPP, _lowcore, lpp);
+	OFFSET(__LC_CURRENT_PID, _lowcore, current_pid);
+	OFFSET(__LC_PERCPU_OFFSET, _lowcore, percpu_offset);
+	OFFSET(__LC_VDSO_PER_CPU, _lowcore, vdso_per_cpu_data);
+	OFFSET(__LC_MACHINE_FLAGS, _lowcore, machine_flags);
+	OFFSET(__LC_GMAP, _lowcore, gmap);
+	OFFSET(__LC_PASTE, _lowcore, paste);
+	/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
+	OFFSET(__LC_DUMP_REIPL, _lowcore, ipib);
+	/* hardware defined lowcore locations 0x1000 - 0x18ff */
+	OFFSET(__LC_VX_SAVE_AREA_ADDR, _lowcore, vector_save_area_addr);
+	OFFSET(__LC_EXT_PARAMS2, _lowcore, ext_params2);
+	OFFSET(SAVE_AREA_BASE, _lowcore, floating_pt_save_area);
+	OFFSET(__LC_FPREGS_SAVE_AREA, _lowcore, floating_pt_save_area);
+	OFFSET(__LC_GPREGS_SAVE_AREA, _lowcore, gpregs_save_area);
+	OFFSET(__LC_PSW_SAVE_AREA, _lowcore, psw_save_area);
+	OFFSET(__LC_PREFIX_SAVE_AREA, _lowcore, prefixreg_save_area);
+	OFFSET(__LC_FP_CREG_SAVE_AREA, _lowcore, fpt_creg_save_area);
+	OFFSET(__LC_CPU_TIMER_SAVE_AREA, _lowcore, cpu_timer_save_area);
+	OFFSET(__LC_CLOCK_COMP_SAVE_AREA, _lowcore, clock_comp_save_area);
+	OFFSET(__LC_AREGS_SAVE_AREA, _lowcore, access_regs_save_area);
+	OFFSET(__LC_CREGS_SAVE_AREA, _lowcore, cregs_save_area);
+	OFFSET(__LC_PGM_TDB, _lowcore, pgm_tdb);
 	BLANK();
-	DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
-	DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area));
-	DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area));
-	DEFINE(__LC_PREFIX_SAVE_AREA, offsetof(struct _lowcore, prefixreg_save_area));
-	DEFINE(__LC_AREGS_SAVE_AREA, offsetof(struct _lowcore, access_regs_save_area));
-	DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area));
-	DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area));
-	DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area));
-	DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
-	DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
-	DEFINE(__LC_VX_SAVE_AREA_ADDR, offsetof(struct _lowcore, vector_save_area_addr));
-	DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
-	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
-	DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
-	DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area));
-	DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr));
-	DEFINE(__LC_PERCPU_OFFSET, offsetof(struct _lowcore, percpu_offset));
-	DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data));
-	DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap));
-	DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
-	DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
-	DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
-	DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
+	/* gmap/sie offsets */
+	OFFSET(__GMAP_ASCE, gmap, asce);
+	OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c);
+	OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20);
 	return 0;
 }
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index e0f9d27..66c9441 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -249,7 +249,7 @@
 		return -EFAULT;
 
 	/* Save vector registers to signal stack */
-	if (is_vx_task(current)) {
+	if (MACHINE_HAS_VX) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
@@ -277,7 +277,7 @@
 		*(__u32 *)&regs->gprs[i] = gprs_high[i];
 
 	/* Restore vector registers from signal stack */
-	if (is_vx_task(current)) {
+	if (MACHINE_HAS_VX) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
@@ -470,8 +470,7 @@
 	 */
 	uc_flags = UC_GPRS_HIGH;
 	if (MACHINE_HAS_VX) {
-		if (is_vx_task(current))
-			uc_flags |= UC_VXRS;
+		uc_flags |= UC_VXRS;
 	} else
 		frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
 			      sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 199ec92..7f76891 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -14,6 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
+#include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include <asm/cpcmd.h>
 #include <asm/io.h>
@@ -70,6 +71,7 @@
 	memcpy(cpcmd_buf, cmd, cmdlen);
 	ASCEBC(cpcmd_buf, cmdlen);
 
+	diag_stat_inc(DIAG_STAT_X008);
 	if (response) {
 		memset(response, 0, rlen);
 		response_len = rlen;
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 0c6c01e..171e09b 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -32,16 +32,6 @@
 	.regions = &oldmem_region,
 };
 
-#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid)		\
-	for (i = 0, __next_mem_range(&i, nid, MEMBLOCK_NONE,		\
-				     &memblock.physmem,			\
-				     &oldmem_type, p_start,		\
-				     p_end, p_nid);			\
-	     i != (u64)ULLONG_MAX;					\
-	     __next_mem_range(&i, nid, MEMBLOCK_NONE, &memblock.physmem,\
-			      &oldmem_type,				\
-			      p_start, p_end, p_nid))
-
 struct dump_save_areas dump_save_areas;
 
 /*
@@ -515,7 +505,8 @@
 	int cnt = 0;
 	u64 idx;
 
-	for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL)
+	for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
+			   MEMBLOCK_NONE, NULL, NULL, NULL)
 		cnt++;
 	return cnt;
 }
@@ -528,7 +519,8 @@
 	phys_addr_t start, end;
 	u64 idx;
 
-	for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) {
+	for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
+			   MEMBLOCK_NONE, &start, &end, NULL) {
 		phdr->p_filesz = end - start;
 		phdr->p_type = PT_LOAD;
 		phdr->p_offset = start;
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index 2f69243..f98766e 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -6,12 +6,137 @@
  */
 
 #include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
 #include <asm/diag.h>
+#include <asm/trace/diag.h>
+
+struct diag_stat {
+	unsigned int counter[NR_DIAG_STAT];
+};
+
+static DEFINE_PER_CPU(struct diag_stat, diag_stat);
+
+struct diag_desc {
+	int code;
+	char *name;
+};
+
+static const struct diag_desc diag_map[NR_DIAG_STAT] = {
+	[DIAG_STAT_X008] = { .code = 0x008, .name = "Console Function" },
+	[DIAG_STAT_X00C] = { .code = 0x00c, .name = "Pseudo Timer" },
+	[DIAG_STAT_X010] = { .code = 0x010, .name = "Release Pages" },
+	[DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" },
+	[DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" },
+	[DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" },
+	[DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" },
+	[DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" },
+	[DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" },
+	[DIAG_STAT_X210] = { .code = 0x210, .name = "Device Information" },
+	[DIAG_STAT_X224] = { .code = 0x224, .name = "EBCDIC-Name Table" },
+	[DIAG_STAT_X250] = { .code = 0x250, .name = "Block I/O" },
+	[DIAG_STAT_X258] = { .code = 0x258, .name = "Page-Reference Services" },
+	[DIAG_STAT_X288] = { .code = 0x288, .name = "Time Bomb" },
+	[DIAG_STAT_X2C4] = { .code = 0x2c4, .name = "FTP Services" },
+	[DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
+	[DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
+	[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
+	[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
+};
+
+static int show_diag_stat(struct seq_file *m, void *v)
+{
+	struct diag_stat *stat;
+	unsigned long n = (unsigned long) v - 1;
+	int cpu, prec, tmp;
+
+	get_online_cpus();
+	if (n == 0) {
+		seq_puts(m, "         ");
+
+		for_each_online_cpu(cpu) {
+			prec = 10;
+			for (tmp = 10; cpu >= tmp; tmp *= 10)
+				prec--;
+			seq_printf(m, "%*s%d", prec, "CPU", cpu);
+		}
+		seq_putc(m, '\n');
+	} else if (n <= NR_DIAG_STAT) {
+		seq_printf(m, "diag %03x:", diag_map[n-1].code);
+		for_each_online_cpu(cpu) {
+			stat = &per_cpu(diag_stat, cpu);
+			seq_printf(m, " %10u", stat->counter[n-1]);
+		}
+		seq_printf(m, "    %s\n", diag_map[n-1].name);
+	}
+	put_online_cpus();
+	return 0;
+}
+
+static void *show_diag_stat_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos <= nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL;
+}
+
+static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return show_diag_stat_start(m, pos);
+}
+
+static void show_diag_stat_stop(struct seq_file *m, void *v)
+{
+}
+
+static const struct seq_operations show_diag_stat_sops = {
+	.start	= show_diag_stat_start,
+	.next	= show_diag_stat_next,
+	.stop	= show_diag_stat_stop,
+	.show	= show_diag_stat,
+};
+
+static int show_diag_stat_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &show_diag_stat_sops);
+}
+
+static const struct file_operations show_diag_stat_fops = {
+	.open		= show_diag_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+
+static int __init show_diag_stat_init(void)
+{
+	debugfs_create_file("diag_stat", 0400, NULL, NULL,
+			    &show_diag_stat_fops);
+	return 0;
+}
+
+device_initcall(show_diag_stat_init);
+
+void diag_stat_inc(enum diag_stat_enum nr)
+{
+	this_cpu_inc(diag_stat.counter[nr]);
+	trace_diagnose(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc);
+
+void diag_stat_inc_norecursion(enum diag_stat_enum nr)
+{
+	this_cpu_inc(diag_stat.counter[nr]);
+	trace_diagnose_norecursion(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc_norecursion);
 
 /*
  * Diagnose 14: Input spool file manipulation
  */
-int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
+static inline int __diag14(unsigned long rx, unsigned long ry1,
+			   unsigned long subcode)
 {
 	register unsigned long _ry1 asm("2") = ry1;
 	register unsigned long _ry2 asm("3") = subcode;
@@ -29,6 +154,12 @@
 
 	return rc;
 }
+
+int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
+{
+	diag_stat_inc(DIAG_STAT_X014);
+	return __diag14(rx, ry1, subcode);
+}
 EXPORT_SYMBOL(diag14);
 
 /*
@@ -48,6 +179,7 @@
 	spin_lock_irqsave(&diag210_lock, flags);
 	diag210_tmp = *addr;
 
+	diag_stat_inc(DIAG_STAT_X210);
 	asm volatile(
 		"	lhi	%0,-1\n"
 		"	sam31\n"
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 549a73a..3c31609 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -17,6 +17,7 @@
 #include <linux/pfn.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
+#include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include <asm/ipl.h>
 #include <asm/lowcore.h>
@@ -286,6 +287,7 @@
 	int rc;
 
 	cpu_address = stap();
+	diag_stat_inc(DIAG_STAT_X09C);
 	asm volatile(
 		"	diag	%2,0,0x9c\n"
 		"0:	la	%0,0\n"
@@ -300,6 +302,7 @@
 {
 	int rc;
 
+	diag_stat_inc(DIAG_STAT_X044);
 	asm volatile(
 		"	diag	0,0,0x44\n"
 		"0:	la	%0,0\n"
@@ -326,10 +329,20 @@
 		S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
 	if (test_facility(51))
 		S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
-	if (test_facility(129))
+	if (test_facility(129)) {
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
+		__ctl_set_bit(0, 17);
+	}
 }
 
+static int __init disable_vector_extension(char *str)
+{
+	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
+	__ctl_clear_bit(0, 17);
+	return 1;
+}
+early_param("novx", disable_vector_extension);
+
 static int __init cad_setup(char *str)
 {
 	int val;
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 582fe44..857b652 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -20,8 +20,9 @@
 #include <asm/page.h>
 #include <asm/sigp.h>
 #include <asm/irq.h>
-#include <asm/fpu-internal.h>
 #include <asm/vx-insn.h>
+#include <asm/setup.h>
+#include <asm/nmi.h>
 
 __PT_R0      =	__PT_GPRS
 __PT_R1      =	__PT_GPRS + 8
@@ -139,6 +140,28 @@
 #endif
 	.endm
 
+	/*
+	 * The TSTMSK macro generates a test-under-mask instruction by
+	 * calculating the memory offset for the specified mask value.
+	 * Mask value can be any constant.  The macro shifts the mask
+	 * value to calculate the memory offset for the test-under-mask
+	 * instruction.
+	 */
+	.macro TSTMSK addr, mask, size=8, bytepos=0
+		.if (\bytepos < \size) && (\mask >> 8)
+			.if (\mask & 0xff)
+				.error "Mask exceeds byte boundary"
+			.endif
+			TSTMSK \addr, "(\mask >> 8)", \size, "(\bytepos + 1)"
+			.exitm
+		.endif
+		.ifeq \mask
+			.error "Mask must not be zero"
+		.endif
+		off = \size - \bytepos - 1
+		tm	off+\addr, \mask
+	.endm
+
 	.section .kprobes.text, "ax"
 
 /*
@@ -164,8 +187,11 @@
 	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
 	lg	%r15,__THREAD_ksp(%r1)		# load kernel stack of next
 	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
-	mvc	__LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
+	mvc	__LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
 	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
+	bzr	%r14
+	.insn	s,0xb2800000,__LC_LPP		# set program parameter
 	br	%r14
 
 .L__critical_start:
@@ -180,8 +206,8 @@
 	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
 	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
 	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
-	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU	# load guest fp/vx registers ?
+	xc	__SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # reason code = 0
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU		# load guest fp/vx registers ?
 	jno	.Lsie_load_guest_gprs
 	brasl	%r14,load_fpu_regs		# load guest fp/vx regs
 .Lsie_load_guest_gprs:
@@ -195,16 +221,9 @@
 	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
 	tm	__SIE_PROG20+3(%r14),3		# last exit...
 	jnz	.Lsie_skip
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	jo	.Lsie_skip			# exit if fp/vx regs changed
-	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
-	jz	.Lsie_enter
-	.insn	s,0xb2800000,__LC_CURRENT_PID	# set guest id to pid
-.Lsie_enter:
 	sie	0(%r14)
-	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
-	jz	.Lsie_skip
-	.insn	s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
 .Lsie_skip:
 	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
@@ -221,11 +240,11 @@
 	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
 	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
 	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
-	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
+	lg	%r2,__SF_EMPTY+16(%r15)		# return exit reason code
 	br	%r14
 .Lsie_fault:
 	lghi	%r14,-EFAULT
-	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
+	stg	%r14,__SF_EMPTY+16(%r15)	# set exit reason code
 	j	sie_exit
 
 	EX_TABLE(.Lrewind_pad,.Lsie_fault)
@@ -271,7 +290,7 @@
 	stg	%r2,__PT_ORIG_GPR2(%r11)
 	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
 	lgf	%r9,0(%r8,%r10)			# get system call add.
-	tm	__TI_flags+7(%r12),_TIF_TRACE
+	TSTMSK	__TI_flags(%r12),_TIF_TRACE
 	jnz	.Lsysc_tracesys
 	basr	%r14,%r9			# call sys_xxxx
 	stg	%r2,__PT_R2(%r11)		# store return value
@@ -279,11 +298,11 @@
 .Lsysc_return:
 	LOCKDEP_SYS_EXIT
 .Lsysc_tif:
-	tm	__PT_FLAGS+7(%r11),_PIF_WORK
+	TSTMSK	__PT_FLAGS(%r11),_PIF_WORK
 	jnz	.Lsysc_work
-	tm	__TI_flags+7(%r12),_TIF_WORK
+	TSTMSK	__TI_flags(%r12),_TIF_WORK
 	jnz	.Lsysc_work			# check for work
-	tm	__LC_CPU_FLAGS+7,_CIF_WORK
+	TSTMSK	__LC_CPU_FLAGS,_CIF_WORK
 	jnz	.Lsysc_work
 .Lsysc_restore:
 	lg	%r14,__LC_VDSO_PER_CPU
@@ -299,23 +318,23 @@
 # One of the work bits is on. Find out which one.
 #
 .Lsysc_work:
-	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+	TSTMSK	__LC_CPU_FLAGS,_CIF_MCCK_PENDING
 	jo	.Lsysc_mcck_pending
-	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
+	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED
 	jo	.Lsysc_reschedule
 #ifdef CONFIG_UPROBES
-	tm	__TI_flags+7(%r12),_TIF_UPROBE
+	TSTMSK	__TI_flags(%r12),_TIF_UPROBE
 	jo	.Lsysc_uprobe_notify
 #endif
-	tm	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
+	TSTMSK	__PT_FLAGS(%r11),_PIF_PER_TRAP
 	jo	.Lsysc_singlestep
-	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
+	TSTMSK	__TI_flags(%r12),_TIF_SIGPENDING
 	jo	.Lsysc_sigpending
-	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+	TSTMSK	__TI_flags(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lsysc_notify_resume
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	jo	.Lsysc_vxrs
-	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
+	TSTMSK	__LC_CPU_FLAGS,_CIF_ASCE
 	jo	.Lsysc_uaccess
 	j	.Lsysc_return		# beware of critical section cleanup
 
@@ -354,7 +373,7 @@
 .Lsysc_sigpending:
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_signal
-	tm	__PT_FLAGS+7(%r11),_PIF_SYSCALL
+	TSTMSK	__PT_FLAGS(%r11),_PIF_SYSCALL
 	jno	.Lsysc_return
 	lmg	%r2,%r7,__PT_R2(%r11)	# load svc arguments
 	lg	%r10,__TI_sysc_table(%r12)	# address of system call table
@@ -414,7 +433,7 @@
 	basr	%r14,%r9		# call sys_xxx
 	stg	%r2,__PT_R2(%r11)	# store return value
 .Lsysc_tracenogo:
-	tm	__TI_flags+7(%r12),_TIF_TRACE
+	TSTMSK	__TI_flags(%r12),_TIF_TRACE
 	jz	.Lsysc_return
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	larl	%r14,.Lsysc_return
@@ -544,6 +563,8 @@
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	TSTMSK	__LC_CPU_FLAGS,_CIF_IGNORE_IRQ
+	jo	.Lio_restore
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 .Lio_loop:
@@ -554,7 +575,7 @@
 	lghi	%r3,THIN_INTERRUPT
 .Lio_call:
 	brasl	%r14,do_IRQ
-	tm	__LC_MACHINE_FLAGS+6,0x10	# MACHINE_FLAG_LPAR
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_LPAR
 	jz	.Lio_return
 	tpi	0
 	jz	.Lio_return
@@ -564,9 +585,9 @@
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON
 .Lio_tif:
-	tm	__TI_flags+7(%r12),_TIF_WORK
+	TSTMSK	__TI_flags(%r12),_TIF_WORK
 	jnz	.Lio_work		# there is work to do (signals etc.)
-	tm	__LC_CPU_FLAGS+7,_CIF_WORK
+	TSTMSK	__LC_CPU_FLAGS,_CIF_WORK
 	jnz	.Lio_work
 .Lio_restore:
 	lg	%r14,__LC_VDSO_PER_CPU
@@ -594,7 +615,7 @@
 	# check for preemptive scheduling
 	icm	%r0,15,__TI_precount(%r12)
 	jnz	.Lio_restore		# preemption is disabled
-	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
+	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED
 	jno	.Lio_restore
 	# switch to kernel stack
 	lg	%r1,__PT_R15(%r11)
@@ -626,17 +647,17 @@
 # One of the work bits is on. Find out which one.
 #
 .Lio_work_tif:
-	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+	TSTMSK	__LC_CPU_FLAGS,_CIF_MCCK_PENDING
 	jo	.Lio_mcck_pending
-	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
+	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED
 	jo	.Lio_reschedule
-	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
+	TSTMSK	__TI_flags(%r12),_TIF_SIGPENDING
 	jo	.Lio_sigpending
-	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+	TSTMSK	__TI_flags(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lio_notify_resume
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	jo	.Lio_vxrs
-	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
+	TSTMSK	__LC_CPU_FLAGS,_CIF_ASCE
 	jo	.Lio_uaccess
 	j	.Lio_return		# beware of critical section cleanup
 
@@ -719,6 +740,8 @@
 	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
 	mvc	__PT_INT_PARM_LONG(8,%r11),0(%r1)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	TSTMSK	__LC_CPU_FLAGS,_CIF_IGNORE_IRQ
+	jo	.Lio_restore
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
@@ -748,27 +771,22 @@
 	br	%r14
 .Lpsw_idle_end:
 
-/* Store floating-point controls and floating-point or vector extension
- * registers instead.  A critical section cleanup assures that the registers
- * are stored even if interrupted for some other work.	The register %r2
- * designates a struct fpu to store register contents.	If the specified
- * structure does not contain a register save area, the register store is
- * omitted (see also comments in arch_dup_task_struct()).
- *
- * The CIF_FPU flag is set in any case.  The CIF_FPU triggers a lazy restore
- * of the register contents at system call or io return.
+/*
+ * Store floating-point controls and floating-point or vector register
+ * depending whether the vector facility is available.	A critical section
+ * cleanup assures that the registers are stored even if interrupted for
+ * some other work.  The CIF_FPU flag is set to trigger a lazy restore
+ * of the register contents at return from io or a system call.
  */
 ENTRY(save_fpu_regs)
 	lg	%r2,__LC_CURRENT
 	aghi	%r2,__TASK_thread
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	bor	%r14
 	stfpc	__THREAD_FPU_fpc(%r2)
 .Lsave_fpu_regs_fpc_end:
 	lg	%r3,__THREAD_FPU_regs(%r2)
-	ltgr	%r3,%r3
-	jz	.Lsave_fpu_regs_done	  # no save area -> set CIF_FPU
-	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
 	jz	.Lsave_fpu_regs_fp	  # no -> store FP regs
 .Lsave_fpu_regs_vx_low:
 	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
@@ -797,41 +815,30 @@
 	br	%r14
 .Lsave_fpu_regs_end:
 
-/* Load floating-point controls and floating-point or vector extension
- * registers.  A critical section cleanup assures that the register contents
- * are loaded even if interrupted for some other work.	Depending on the saved
- * FP/VX state, the vector-enablement control, CR0.46, is either set or cleared.
+/*
+ * Load floating-point controls and floating-point or vector registers.
+ * A critical section cleanup assures that the register contents are
+ * loaded even if interrupted for some other work.
  *
  * There are special calling conventions to fit into sysc and io return work:
  *	%r15:	<kernel stack>
  * The function requires:
- *	%r4 and __SF_EMPTY+32(%r15)
+ *	%r4
  */
 load_fpu_regs:
 	lg	%r4,__LC_CURRENT
 	aghi	%r4,__TASK_thread
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	bnor	%r14
 	lfpc	__THREAD_FPU_fpc(%r4)
-	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
-	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
 	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
-	jz	.Lload_fpu_regs_fp_ctl		# -> no VX, load FP regs
-.Lload_fpu_regs_vx_ctl:
-	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
-	jo	.Lload_fpu_regs_vx
-	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
-	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+	jz	.Lload_fpu_regs_fp		# -> no VX, load FP regs
 .Lload_fpu_regs_vx:
 	VLM	%v0,%v15,0,%r4
 .Lload_fpu_regs_vx_high:
 	VLM	%v16,%v31,256,%r4
 	j	.Lload_fpu_regs_done
-.Lload_fpu_regs_fp_ctl:
-	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
-	jz	.Lload_fpu_regs_fp
-	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
-	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
 .Lload_fpu_regs_fp:
 	ld	0,0(%r4)
 	ld	1,8(%r4)
@@ -854,16 +861,6 @@
 	br	%r14
 .Lload_fpu_regs_end:
 
-/* Test and set the vector enablement control in CR0.46 */
-ENTRY(__ctl_set_vx)
-	stctg	%c0,%c0,__SF_EMPTY(%r15)
-	tm	__SF_EMPTY+5(%r15),2
-	bor	%r14
-	oi	__SF_EMPTY+5(%r15),2
-	lctlg	%c0,%c0,__SF_EMPTY(%r15)
-	br	%r14
-.L__ctl_set_vx_end:
-
 .L__critical_end:
 
 /*
@@ -878,11 +875,11 @@
 	lg	%r12,__LC_THREAD_INFO
 	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_MCK_OLD_PSW
-	tm	__LC_MCCK_CODE,0x80	# system damage?
+	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
 	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
 	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
-	tm	__LC_MCCK_CODE+5,0x02	# stored cpu timer value valid?
+	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
 	jo	3f
 	la	%r14,__LC_SYNC_ENTER_TIMER
 	clc	0(8,%r14),__LC_ASYNC_ENTER_TIMER
@@ -896,7 +893,7 @@
 	la	%r14,__LC_LAST_UPDATE_TIMER
 2:	spt	0(%r14)
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
-3:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
+3:	TSTMSK	__LC_MCCK_CODE,(MCCK_CODE_PSW_MWP_VALID|MCCK_CODE_PSW_IA_VALID)
 	jno	.Lmcck_panic		# no -> skip cleanup critical
 	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
 .Lmcck_skip:
@@ -916,7 +913,7 @@
 	la	%r11,STACK_FRAME_OVERHEAD(%r1)
 	lgr	%r15,%r1
 	ssm	__LC_PGM_NEW_PSW	# turn dat on, keep irqs off
-	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+	TSTMSK	__LC_CPU_FLAGS,_CIF_MCCK_PENDING
 	jno	.Lmcck_return
 	TRACE_IRQS_OFF
 	brasl	%r14,s390_handle_mcck
@@ -941,7 +938,10 @@
 # PSW restart interrupt handler
 #
 ENTRY(restart_int_handler)
-	stg	%r15,__LC_SAVE_AREA_RESTART
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
+	jz	0f
+	.insn	s,0xb2800000,__LC_LPP
+0:	stg	%r15,__LC_SAVE_AREA_RESTART
 	lg	%r15,__LC_RESTART_STACK
 	aghi	%r15,-__PT_SIZE			# create pt_regs on stack
 	xc	0(__PT_SIZE,%r15),0(%r15)
@@ -1019,10 +1019,6 @@
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+104)	# .Lload_fpu_regs_end
 	jl	.Lcleanup_load_fpu_regs
-	clg	%r9,BASED(.Lcleanup_table+112)	# __ctl_set_vx
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+120)	# .L__ctl_set_vx_end
-	jl	.Lcleanup___ctl_set_vx
 0:	br	%r14
 
 	.align	8
@@ -1041,8 +1037,6 @@
 	.quad	.Lsave_fpu_regs_end
 	.quad	load_fpu_regs
 	.quad	.Lload_fpu_regs_end
-	.quad	__ctl_set_vx
-	.quad	.L__ctl_set_vx_end
 
 #if IS_ENABLED(CONFIG_KVM)
 .Lcleanup_table_sie:
@@ -1051,10 +1045,7 @@
 
 .Lcleanup_sie:
 	lg	%r9,__SF_EMPTY(%r15)		# get control block pointer
-	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
-	jz	0f
-	.insn	s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
-0:	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
+	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	larl	%r9,sie_exit			# skip forward to sie_exit
 	br	%r14
@@ -1206,7 +1197,7 @@
 	.quad	.Lpsw_idle_lpsw
 
 .Lcleanup_save_fpu_regs:
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	bor	%r14
 	clg	%r9,BASED(.Lcleanup_save_fpu_regs_done)
 	jhe	5f
@@ -1224,9 +1215,7 @@
 	stfpc	__THREAD_FPU_fpc(%r2)
 1:	# Load register save area and check if VX is active
 	lg	%r3,__THREAD_FPU_regs(%r2)
-	ltgr	%r3,%r3
-	jz	5f			  # no save area -> set CIF_FPU
-	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
 	jz	4f			  # no VX -> store FP regs
 2:	# Store vector registers (V0-V15)
 	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
@@ -1266,43 +1255,27 @@
 	.quad	.Lsave_fpu_regs_done
 
 .Lcleanup_load_fpu_regs:
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	bnor	%r14
 	clg	%r9,BASED(.Lcleanup_load_fpu_regs_done)
 	jhe	1f
 	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp)
 	jhe	2f
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp_ctl)
-	jhe	3f
 	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
-	jhe	4f
+	jhe	3f
 	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx)
-	jhe	5f
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl)
-	jhe	6f
+	jhe	4f
 	lg	%r4,__LC_CURRENT
 	aghi	%r4,__TASK_thread
 	lfpc	__THREAD_FPU_fpc(%r4)
-	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
 	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
-	jz	3f				# -> no VX, load FP regs
-6:	# Set VX-enablement control
-	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
-	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
-	jo	5f
-	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
-	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
-5:	# Load V0 ..V15 registers
+	jz	2f				# -> no VX, load FP regs
+4:	# Load V0 ..V15 registers
 	VLM	%v0,%v15,0,%r4
-4:	# Load V16..V31 registers
+3:	# Load V16..V31 registers
 	VLM	%v16,%v31,256,%r4
 	j	1f
-3:	# Clear VX-enablement control for FP
-	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
-	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
-	jz	2f
-	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
-	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
 2:	# Load floating-point registers
 	ld	0,0(%r4)
 	ld	1,8(%r4)
@@ -1324,28 +1297,15 @@
 	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
 	lg	%r9,48(%r11)		# return from load_fpu_regs
 	br	%r14
-.Lcleanup_load_fpu_regs_vx_ctl:
-	.quad	.Lload_fpu_regs_vx_ctl
 .Lcleanup_load_fpu_regs_vx:
 	.quad	.Lload_fpu_regs_vx
 .Lcleanup_load_fpu_regs_vx_high:
 	.quad	.Lload_fpu_regs_vx_high
-.Lcleanup_load_fpu_regs_fp_ctl:
-	.quad	.Lload_fpu_regs_fp_ctl
 .Lcleanup_load_fpu_regs_fp:
 	.quad	.Lload_fpu_regs_fp
 .Lcleanup_load_fpu_regs_done:
 	.quad	.Lload_fpu_regs_done
 
-.Lcleanup___ctl_set_vx:
-	stctg	%c0,%c0,__SF_EMPTY(%r15)
-	tm	__SF_EMPTY+5(%r15),2
-	bor	%r14
-	oi	__SF_EMPTY+5(%r15),2
-	lctlg	%c0,%c0,__SF_EMPTY(%r15)
-	lg	%r9,48(%r11)		# return from __ctl_set_vx
-	br	%r14
-
 /*
  * Integer constants
  */
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 834df04..b7019ab 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -16,13 +16,10 @@
 void mcck_int_handler(void);
 void restart_int_handler(void);
 void restart_call_handler(void);
-void psw_idle(struct s390_idle_data *, unsigned long);
 
 asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
 asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
 
-int alloc_vector_registers(struct task_struct *tsk);
-
 void do_protection_exception(struct pt_regs *regs);
 void do_dat_exception(struct pt_regs *regs);
 
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index d7c0050..58b719f 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -16,7 +16,12 @@
 
 __HEAD
 ENTRY(startup_continue)
-	larl	%r1,sched_clock_base_cc
+	tm	__LC_STFL_FAC_LIST+6,0x80	# LPP available ?
+	jz	0f
+	xc	__LC_LPP+1(7,0),__LC_LPP+1	# clear lpp and current_pid
+	mvi	__LC_LPP,0x80			#   and set LPP_MAGIC
+	.insn	s,0xb2800000,__LC_LPP		# load program parameter
+0:	larl	%r1,sched_clock_base_cc
 	mvc	0(8,%r1),__LC_LAST_UPDATE_CLOCK
 	larl	%r13,.LPG1		# get base
 	lctlg	%c0,%c15,.Lctl-.LPG1(%r13)	# load control registers
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 52fbef9..f6d8acd 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -17,6 +17,7 @@
 #include <linux/gfp.h>
 #include <linux/crash_dump.h>
 #include <linux/debug_locks.h>
+#include <asm/diag.h>
 #include <asm/ipl.h>
 #include <asm/smp.h>
 #include <asm/setup.h>
@@ -165,7 +166,7 @@
 
 static struct sclp_ipl_info sclp_ipl_info;
 
-int diag308(unsigned long subcode, void *addr)
+static inline int __diag308(unsigned long subcode, void *addr)
 {
 	register unsigned long _addr asm("0") = (unsigned long) addr;
 	register unsigned long _rc asm("1") = 0;
@@ -178,6 +179,12 @@
 		: "d" (subcode) : "cc", "memory");
 	return _rc;
 }
+
+int diag308(unsigned long subcode, void *addr)
+{
+	diag_stat_inc(DIAG_STAT_X308);
+	return __diag308(subcode, addr);
+}
 EXPORT_SYMBOL_GPL(diag308);
 
 /* SYSFS */
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index e9d9add..f41d520 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -69,7 +69,6 @@
 	{.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"},
 	{.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
 	{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
-	{.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"},
 	{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
 	{.irq = IRQIO_CIO,  .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
 	{.irq = IRQIO_QAI,  .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 0ae6f8e..07302ce 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -21,19 +21,20 @@
 #include <asm/nmi.h>
 #include <asm/crw.h>
 #include <asm/switch_to.h>
-#include <asm/fpu-internal.h>
 #include <asm/ctl_reg.h>
 
 struct mcck_struct {
-	int kill_task;
-	int channel_report;
-	int warning;
-	unsigned long long mcck_code;
+	unsigned int kill_task : 1;
+	unsigned int channel_report : 1;
+	unsigned int warning : 1;
+	unsigned int etr_queue : 1;
+	unsigned int stp_queue : 1;
+	unsigned long mcck_code;
 };
 
 static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
 
-static void s390_handle_damage(char *msg)
+static void s390_handle_damage(void)
 {
 	smp_send_stop();
 	disabled_wait((unsigned long) __builtin_return_address(0));
@@ -81,10 +82,14 @@
 		if (xchg(&mchchk_wng_posted, 1) == 0)
 			kill_cad_pid(SIGPWR, 1);
 	}
+	if (mcck.etr_queue)
+		etr_queue_work();
+	if (mcck.stp_queue)
+		stp_queue_work();
 	if (mcck.kill_task) {
 		local_irq_enable();
 		printk(KERN_EMERG "mcck: Terminating task because of machine "
-		       "malfunction (code 0x%016llx).\n", mcck.mcck_code);
+		       "malfunction (code 0x%016lx).\n", mcck.mcck_code);
 		printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
 		       current->comm, current->pid);
 		do_exit(SIGSEGV);
@@ -96,7 +101,7 @@
  * returns 0 if all registers could be validated
  * returns 1 otherwise
  */
-static int notrace s390_revalidate_registers(struct mci *mci)
+static int notrace s390_validate_registers(union mci mci)
 {
 	int kill_task;
 	u64 zero;
@@ -105,14 +110,14 @@
 	kill_task = 0;
 	zero = 0;
 
-	if (!mci->gr) {
+	if (!mci.gr) {
 		/*
 		 * General purpose registers couldn't be restored and have
 		 * unknown contents. Process needs to be terminated.
 		 */
 		kill_task = 1;
 	}
-	if (!mci->fp) {
+	if (!mci.fp) {
 		/*
 		 * Floating point registers can't be restored and
 		 * therefore the process needs to be terminated.
@@ -121,7 +126,7 @@
 	}
 	fpt_save_area = &S390_lowcore.floating_pt_save_area;
 	fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
-	if (!mci->fc) {
+	if (!mci.fc) {
 		/*
 		 * Floating point control register can't be restored.
 		 * Task will be terminated.
@@ -132,7 +137,7 @@
 		asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
 
 	if (!MACHINE_HAS_VX) {
-		/* Revalidate floating point registers */
+		/* Validate floating point registers */
 		asm volatile(
 			"	ld	0,0(%0)\n"
 			"	ld	1,8(%0)\n"
@@ -152,10 +157,10 @@
 			"	ld	15,120(%0)\n"
 			: : "a" (fpt_save_area));
 	} else {
-		/* Revalidate vector registers */
+		/* Validate vector registers */
 		union ctlreg0 cr0;
 
-		if (!mci->vr) {
+		if (!mci.vr) {
 			/*
 			 * Vector registers can't be restored and therefore
 			 * the process needs to be terminated.
@@ -173,38 +178,38 @@
 				 &S390_lowcore.vector_save_area) : "1");
 		__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
 	}
-	/* Revalidate access registers */
+	/* Validate access registers */
 	asm volatile(
 		"	lam	0,15,0(%0)"
 		: : "a" (&S390_lowcore.access_regs_save_area));
-	if (!mci->ar) {
+	if (!mci.ar) {
 		/*
 		 * Access registers have unknown contents.
 		 * Terminating task.
 		 */
 		kill_task = 1;
 	}
-	/* Revalidate control registers */
-	if (!mci->cr) {
+	/* Validate control registers */
+	if (!mci.cr) {
 		/*
 		 * Control registers have unknown contents.
 		 * Can't recover and therefore stopping machine.
 		 */
-		s390_handle_damage("invalid control registers.");
+		s390_handle_damage();
 	} else {
 		asm volatile(
 			"	lctlg	0,15,0(%0)"
 			: : "a" (&S390_lowcore.cregs_save_area));
 	}
 	/*
-	 * We don't even try to revalidate the TOD register, since we simply
+	 * We don't even try to validate the TOD register, since we simply
 	 * can't write something sensible into that register.
 	 */
 	/*
-	 * See if we can revalidate the TOD programmable register with its
+	 * See if we can validate the TOD programmable register with its
 	 * old contents (should be zero) otherwise set it to zero.
 	 */
-	if (!mci->pr)
+	if (!mci.pr)
 		asm volatile(
 			"	sr	0,0\n"
 			"	sckpf"
@@ -215,17 +220,17 @@
 			"	sckpf"
 			: : "a" (&S390_lowcore.tod_progreg_save_area)
 			: "0", "cc");
-	/* Revalidate clock comparator register */
+	/* Validate clock comparator register */
 	set_clock_comparator(S390_lowcore.clock_comparator);
 	/* Check if old PSW is valid */
-	if (!mci->wp)
+	if (!mci.wp)
 		/*
 		 * Can't tell if we come from user or kernel mode
 		 * -> stopping machine.
 		 */
-		s390_handle_damage("old psw invalid.");
+		s390_handle_damage();
 
-	if (!mci->ms || !mci->pm || !mci->ia)
+	if (!mci.ms || !mci.pm || !mci.ia)
 		kill_task = 1;
 
 	return kill_task;
@@ -249,21 +254,21 @@
 	static unsigned long long last_ipd;
 	struct mcck_struct *mcck;
 	unsigned long long tmp;
-	struct mci *mci;
+	union mci mci;
 	int umode;
 
 	nmi_enter();
 	inc_irq_stat(NMI_NMI);
-	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+	mci.val = S390_lowcore.mcck_interruption_code;
 	mcck = this_cpu_ptr(&cpu_mcck);
 	umode = user_mode(regs);
 
-	if (mci->sd) {
+	if (mci.sd) {
 		/* System damage -> stopping machine */
-		s390_handle_damage("received system damage machine check.");
+		s390_handle_damage();
 	}
-	if (mci->pd) {
-		if (mci->b) {
+	if (mci.pd) {
+		if (mci.b) {
 			/* Processing backup -> verify if we can survive this */
 			u64 z_mcic, o_mcic, t_mcic;
 			z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
@@ -271,12 +276,11 @@
 				  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
 				  1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
 				  1ULL<<16);
-			t_mcic = *(u64 *)mci;
+			t_mcic = mci.val;
 
 			if (((t_mcic & z_mcic) != 0) ||
 			    ((t_mcic & o_mcic) != o_mcic)) {
-				s390_handle_damage("processing backup machine "
-						   "check with damage.");
+				s390_handle_damage();
 			}
 
 			/*
@@ -291,64 +295,62 @@
 				ipd_count = 1;
 			last_ipd = tmp;
 			if (ipd_count == MAX_IPD_COUNT)
-				s390_handle_damage("too many ipd retries.");
+				s390_handle_damage();
 			spin_unlock(&ipd_lock);
 		} else {
 			/* Processing damage -> stopping machine */
-			s390_handle_damage("received instruction processing "
-					   "damage machine check.");
+			s390_handle_damage();
 		}
 	}
-	if (s390_revalidate_registers(mci)) {
+	if (s390_validate_registers(mci)) {
 		if (umode) {
 			/*
 			 * Couldn't restore all register contents while in
 			 * user mode -> mark task for termination.
 			 */
 			mcck->kill_task = 1;
-			mcck->mcck_code = *(unsigned long long *) mci;
+			mcck->mcck_code = mci.val;
 			set_cpu_flag(CIF_MCCK_PENDING);
 		} else {
 			/*
 			 * Couldn't restore all register contents while in
 			 * kernel mode -> stopping machine.
 			 */
-			s390_handle_damage("unable to revalidate registers.");
+			s390_handle_damage();
 		}
 	}
-	if (mci->cd) {
+	if (mci.cd) {
 		/* Timing facility damage */
-		s390_handle_damage("TOD clock damaged");
+		s390_handle_damage();
 	}
-	if (mci->ed && mci->ec) {
+	if (mci.ed && mci.ec) {
 		/* External damage */
 		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
-			etr_sync_check();
+			mcck->etr_queue |= etr_sync_check();
 		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
-			etr_switch_to_local();
+			mcck->etr_queue |= etr_switch_to_local();
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
-			stp_sync_check();
+			mcck->stp_queue |= stp_sync_check();
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
-			stp_island_check();
+			mcck->stp_queue |= stp_island_check();
+		if (mcck->etr_queue || mcck->stp_queue)
+			set_cpu_flag(CIF_MCCK_PENDING);
 	}
-	if (mci->se)
+	if (mci.se)
 		/* Storage error uncorrected */
-		s390_handle_damage("received storage error uncorrected "
-				   "machine check.");
-	if (mci->ke)
+		s390_handle_damage();
+	if (mci.ke)
 		/* Storage key-error uncorrected */
-		s390_handle_damage("received storage key-error uncorrected "
-				   "machine check.");
-	if (mci->ds && mci->fa)
+		s390_handle_damage();
+	if (mci.ds && mci.fa)
 		/* Storage degradation */
-		s390_handle_damage("received storage degradation machine "
-				   "check.");
-	if (mci->cp) {
+		s390_handle_damage();
+	if (mci.cp) {
 		/* Channel report word pending */
 		mcck->channel_report = 1;
 		set_cpu_flag(CIF_MCCK_PENDING);
 	}
-	if (mci->w) {
+	if (mci.w) {
 		/* Warning pending */
 		mcck->warning = 1;
 		set_cpu_flag(CIF_MCCK_PENDING);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index b973972..3d8da1e 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1019,11 +1019,13 @@
 		break;
 	}
 
-	/* The host-program-parameter (hpp) contains the pid of
-	 * the CPU thread as set by sie64a() in entry.S.
-	 * If non-zero assume a guest sample.
+	/*
+	 * A non-zero guest program parameter indicates a guest
+	 * sample.
+	 * Note that some early samples might be misaccounted to
+	 * the host.
 	 */
-	if (sfr->basic.hpp)
+	if (sfr->basic.gpp)
 		sde_regs->in_guest = 1;
 
 	overflow = 0;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index f2dac9f..688a3aa 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -23,6 +23,7 @@
 #include <linux/kprobes.h>
 #include <linux/random.h>
 #include <linux/module.h>
+#include <linux/init_task.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/vtimer.h>
@@ -36,6 +37,9 @@
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
 
+/* FPU save area for the init task */
+__vector128 init_task_fpu_regs[__NUM_VXRS] __init_task_data;
+
 /*
  * Return saved PC of a blocked thread. used in kernel/sched.
  * resume in entry.S does not create a new stack frame, it
@@ -87,31 +91,29 @@
 
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
+	size_t fpu_regs_size;
+
 	*dst = *src;
 
-	/* Set up a new floating-point register save area */
-	dst->thread.fpu.fpc = 0;
-	dst->thread.fpu.flags = 0;	/* Always start with VX disabled */
-	dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
-				       GFP_KERNEL|__GFP_REPEAT);
-	if (!dst->thread.fpu.fprs)
+	/*
+	 * If the vector extension is available, it is enabled for all tasks,
+	 * and, thus, the FPU register save area must be allocated accordingly.
+	 */
+	fpu_regs_size = MACHINE_HAS_VX ? sizeof(__vector128) * __NUM_VXRS
+				       : sizeof(freg_t) * __NUM_FPRS;
+	dst->thread.fpu.regs = kzalloc(fpu_regs_size, GFP_KERNEL|__GFP_REPEAT);
+	if (!dst->thread.fpu.regs)
 		return -ENOMEM;
 
 	/*
 	 * Save the floating-point or vector register state of the current
-	 * task.  The state is not saved for early kernel threads, for example,
-	 * the init_task, which do not have an allocated save area.
-	 * The CIF_FPU flag is set in any case to lazy clear or restore a saved
-	 * state when switching to a different task or returning to user space.
+	 * task and set the CIF_FPU flag to lazy restore the FPU register
+	 * state when returning to user space.
 	 */
 	save_fpu_regs();
 	dst->thread.fpu.fpc = current->thread.fpu.fpc;
-	if (is_vx_task(current))
-		convert_vx_to_fp(dst->thread.fpu.fprs,
-				 current->thread.fpu.vxrs);
-	else
-		memcpy(dst->thread.fpu.fprs, current->thread.fpu.fprs,
-		       sizeof(freg_t) * __NUM_FPRS);
+	memcpy(dst->thread.fpu.regs, current->thread.fpu.regs, fpu_regs_size);
+
 	return 0;
 }
 
@@ -169,7 +171,6 @@
 
 	/* Don't copy runtime instrumentation info */
 	p->thread.ri_cb = NULL;
-	p->thread.ri_signum = 0;
 	frame->childregs.psw.mask &= ~PSW_MASK_RI;
 
 	/* Set a new TLS ?  */
@@ -199,7 +200,7 @@
 	save_fpu_regs();
 	fpregs->fpc = current->thread.fpu.fpc;
 	fpregs->pad = 0;
-	if (is_vx_task(current))
+	if (MACHINE_HAS_VX)
 		convert_vx_to_fp((freg_t *)&fpregs->fprs,
 				 current->thread.fpu.vxrs);
 	else
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index e6e077a..7ce00e7 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -11,6 +11,7 @@
 #include <linux/seq_file.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
+#include <asm/diag.h>
 #include <asm/elf.h>
 #include <asm/lowcore.h>
 #include <asm/param.h>
@@ -20,8 +21,10 @@
 
 void notrace cpu_relax(void)
 {
-	if (!smp_cpu_mtid && MACHINE_HAS_DIAG44)
+	if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) {
+		diag_stat_inc(DIAG_STAT_X044);
 		asm volatile("diag 0,0,0x44");
+	}
 	barrier();
 }
 EXPORT_SYMBOL(cpu_relax);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 8b1c8e3..01c37b3 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -239,12 +239,12 @@
 		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (is_vx_task(child))
+		if (MACHINE_HAS_VX)
 			tmp = *(addr_t *)
 			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 			tmp = *(addr_t *)
-			       ((addr_t) &child->thread.fpu.fprs + offset);
+			       ((addr_t) child->thread.fpu.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -383,12 +383,12 @@
 		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (is_vx_task(child))
+		if (MACHINE_HAS_VX)
 			*(addr_t *)((addr_t)
 				child->thread.fpu.vxrs + 2*offset) = data;
 		else
 			*(addr_t *)((addr_t)
-				&child->thread.fpu.fprs + offset) = data;
+				child->thread.fpu.fprs + offset) = data;
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -617,12 +617,12 @@
 		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (is_vx_task(child))
+		if (MACHINE_HAS_VX)
 			tmp = *(__u32 *)
 			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 			tmp = *(__u32 *)
-			       ((addr_t) &child->thread.fpu.fprs + offset);
+			       ((addr_t) child->thread.fpu.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
@@ -742,12 +742,12 @@
 		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (is_vx_task(child))
+		if (MACHINE_HAS_VX)
 			*(__u32 *)((addr_t)
 				child->thread.fpu.vxrs + 2*offset) = tmp;
 		else
 			*(__u32 *)((addr_t)
-				&child->thread.fpu.fprs + offset) = tmp;
+				child->thread.fpu.fprs + offset) = tmp;
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
@@ -981,7 +981,7 @@
 	if (rc)
 		return rc;
 
-	if (is_vx_task(target))
+	if (MACHINE_HAS_VX)
 		convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
 	else
 		memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
@@ -1047,13 +1047,10 @@
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (is_vx_task(target)) {
-		if (target == current)
-			save_fpu_regs();
-		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
-	} else
-		memset(vxrs, 0, sizeof(vxrs));
+	if (target == current)
+		save_fpu_regs();
+	for (i = 0; i < __NUM_VXRS_LOW; i++)
+		vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
 }
 
@@ -1067,11 +1064,7 @@
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (!is_vx_task(target)) {
-		rc = alloc_vector_registers(target);
-		if (rc)
-			return rc;
-	} else if (target == current)
+	if (target == current)
 		save_fpu_regs();
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
@@ -1091,13 +1084,10 @@
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (is_vx_task(target)) {
-		if (target == current)
-			save_fpu_regs();
-		memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
-		       sizeof(vxrs));
-	} else
-		memset(vxrs, 0, sizeof(vxrs));
+	if (target == current)
+		save_fpu_regs();
+	memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs));
+
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
 }
 
@@ -1110,11 +1100,7 @@
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (!is_vx_task(target)) {
-		rc = alloc_vector_registers(target);
-		if (rc)
-			return rc;
-	} else if (target == current)
+	if (target == current)
 		save_fpu_regs();
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index 26b4ae9..fffa0e5 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -18,11 +18,6 @@
 /* empty control block to disable RI by loading it */
 struct runtime_instr_cb runtime_instr_empty_cb;
 
-static int runtime_instr_avail(void)
-{
-	return test_facility(64);
-}
-
 static void disable_runtime_instr(void)
 {
 	struct pt_regs *regs = task_pt_regs(current);
@@ -40,7 +35,6 @@
 static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
 {
 	cb->buf_limit = 0xfff;
-	cb->int_requested = 1;
 	cb->pstate = 1;
 	cb->pstate_set_buf = 1;
 	cb->pstate_sample = 1;
@@ -57,46 +51,14 @@
 		return;
 	disable_runtime_instr();
 	kfree(task->thread.ri_cb);
-	task->thread.ri_signum = 0;
 	task->thread.ri_cb = NULL;
 }
 
-static void runtime_instr_int_handler(struct ext_code ext_code,
-				unsigned int param32, unsigned long param64)
-{
-	struct siginfo info;
-
-	if (!(param32 & CPU_MF_INT_RI_MASK))
-		return;
-
-	inc_irq_stat(IRQEXT_CMR);
-
-	if (!current->thread.ri_cb)
-		return;
-	if (current->thread.ri_signum < SIGRTMIN ||
-	    current->thread.ri_signum > SIGRTMAX) {
-		WARN_ON_ONCE(1);
-		return;
-	}
-
-	memset(&info, 0, sizeof(info));
-	info.si_signo = current->thread.ri_signum;
-	info.si_code = SI_QUEUE;
-	if (param32 & CPU_MF_INT_RI_BUF_FULL)
-		info.si_int = ENOBUFS;
-	else if (param32 & CPU_MF_INT_RI_HALTED)
-		info.si_int = ECANCELED;
-	else
-		return; /* unknown reason */
-
-	send_sig_info(current->thread.ri_signum, &info, current);
-}
-
-SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
+SYSCALL_DEFINE1(s390_runtime_instr, int, command)
 {
 	struct runtime_instr_cb *cb;
 
-	if (!runtime_instr_avail())
+	if (!test_facility(64))
 		return -EOPNOTSUPP;
 
 	if (command == S390_RUNTIME_INSTR_STOP) {
@@ -106,8 +68,7 @@
 		return 0;
 	}
 
-	if (command != S390_RUNTIME_INSTR_START ||
-	    (signum < SIGRTMIN || signum > SIGRTMAX))
+	if (command != S390_RUNTIME_INSTR_START)
 		return -EINVAL;
 
 	if (!current->thread.ri_cb) {
@@ -120,7 +81,6 @@
 	}
 
 	init_runtime_instr_cb(cb);
-	current->thread.ri_signum = signum;
 
 	/* now load the control block to make it available */
 	preempt_disable();
@@ -129,21 +89,3 @@
 	preempt_enable();
 	return 0;
 }
-
-static int __init runtime_instr_init(void)
-{
-	int rc;
-
-	if (!runtime_instr_avail())
-		return 0;
-
-	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-	rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
-				   runtime_instr_int_handler);
-	if (rc)
-		irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-	else
-		pr_info("Runtime instrumentation facility initialized\n");
-	return rc;
-}
-device_initcall(runtime_instr_init);
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 5090d3d..e67453b 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -1,6 +1,6 @@
 #include <linux/module.h>
 #include <linux/kvm_host.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/api.h>
 #include <asm/ftrace.h>
 
 #ifdef CONFIG_FUNCTION_TRACER
@@ -10,7 +10,6 @@
 EXPORT_SYMBOL(sie64a);
 EXPORT_SYMBOL(sie_exit);
 EXPORT_SYMBOL(save_fpu_regs);
-EXPORT_SYMBOL(__ctl_set_vx);
 #endif
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 9549af1..028cc46 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -179,7 +179,7 @@
 	int i;
 
 	/* Save vector registers to signal stack */
-	if (is_vx_task(current)) {
+	if (MACHINE_HAS_VX) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
@@ -199,7 +199,7 @@
 	int i;
 
 	/* Restore vector registers from signal stack */
-	if (is_vx_task(current)) {
+	if (MACHINE_HAS_VX) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
@@ -381,8 +381,7 @@
 	uc_flags = 0;
 	if (MACHINE_HAS_VX) {
 		frame_size += sizeof(_sigregs_ext);
-		if (is_vx_task(current))
-			uc_flags |= UC_VXRS;
+		uc_flags |= UC_VXRS;
 	}
 	frame = get_sigframe(&ksig->ka, regs, frame_size);
 	if (frame == (void __user *) -1UL)
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index c6355e6..9062df5 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -33,6 +33,7 @@
 #include <linux/crash_dump.h>
 #include <linux/memblock.h>
 #include <asm/asm-offsets.h>
+#include <asm/diag.h>
 #include <asm/switch_to.h>
 #include <asm/facility.h>
 #include <asm/ipl.h>
@@ -261,6 +262,8 @@
 		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
 	lc->thread_info = (unsigned long) task_thread_info(tsk);
 	lc->current_task = (unsigned long) tsk;
+	lc->lpp = LPP_MAGIC;
+	lc->current_pid = tsk->pid;
 	lc->user_timer = ti->user_timer;
 	lc->system_timer = ti->system_timer;
 	lc->steal_timer = 0;
@@ -375,11 +378,14 @@
 
 void smp_yield_cpu(int cpu)
 {
-	if (MACHINE_HAS_DIAG9C)
+	if (MACHINE_HAS_DIAG9C) {
+		diag_stat_inc_norecursion(DIAG_STAT_X09C);
 		asm volatile("diag %0,0,0x9c"
 			     : : "d" (pcpu_devices[cpu].address));
-	else if (MACHINE_HAS_DIAG44)
+	} else if (MACHINE_HAS_DIAG44) {
+		diag_stat_inc_norecursion(DIAG_STAT_X044);
 		asm volatile("diag 0,0,0x44");
+	}
 }
 
 /*
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 017c3a9..99f84ac31 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -542,16 +542,17 @@
  * Switch to local machine check. This is called when the last usable
  * ETR port goes inactive. After switch to local the clock is not in sync.
  */
-void etr_switch_to_local(void)
+int etr_switch_to_local(void)
 {
 	if (!etr_eacr.sl)
-		return;
+		return 0;
 	disable_sync_clock(NULL);
 	if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) {
 		etr_eacr.es = etr_eacr.sl = 0;
 		etr_setr(&etr_eacr);
-		queue_work(time_sync_wq, &etr_work);
+		return 1;
 	}
+	return 0;
 }
 
 /*
@@ -560,16 +561,22 @@
  * After a ETR sync check the clock is not in sync. The machine check
  * is broadcasted to all cpus at the same time.
  */
-void etr_sync_check(void)
+int etr_sync_check(void)
 {
 	if (!etr_eacr.es)
-		return;
+		return 0;
 	disable_sync_clock(NULL);
 	if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) {
 		etr_eacr.es = 0;
 		etr_setr(&etr_eacr);
-		queue_work(time_sync_wq, &etr_work);
+		return 1;
 	}
+	return 0;
+}
+
+void etr_queue_work(void)
+{
+	queue_work(time_sync_wq, &etr_work);
 }
 
 /*
@@ -1504,10 +1511,10 @@
  * After a STP sync check the clock is not in sync. The machine check
  * is broadcasted to all cpus at the same time.
  */
-void stp_sync_check(void)
+int stp_sync_check(void)
 {
 	disable_sync_clock(NULL);
-	queue_work(time_sync_wq, &stp_work);
+	return 1;
 }
 
 /*
@@ -1516,12 +1523,16 @@
  * have matching CTN ids and have a valid stratum-1 configuration
  * but the configurations do not match.
  */
-void stp_island_check(void)
+int stp_island_check(void)
 {
 	disable_sync_clock(NULL);
-	queue_work(time_sync_wq, &stp_work);
+	return 1;
 }
 
+void stp_queue_work(void)
+{
+	queue_work(time_sync_wq, &stp_work);
+}
 
 static int stp_sync_clock(void *data)
 {
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index bf05e7f..40b8102 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -84,6 +84,7 @@
 					  struct mask_info *socket,
 					  int one_socket_per_cpu)
 {
+	struct cpu_topology_s390 *topo;
 	unsigned int core;
 
 	for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) {
@@ -95,15 +96,16 @@
 		if (lcpu < 0)
 			continue;
 		for (i = 0; i <= smp_cpu_mtid; i++) {
-			per_cpu(cpu_topology, lcpu + i).book_id = book->id;
-			per_cpu(cpu_topology, lcpu + i).core_id = rcore;
-			per_cpu(cpu_topology, lcpu + i).thread_id = lcpu + i;
+			topo = &per_cpu(cpu_topology, lcpu + i);
+			topo->book_id = book->id;
+			topo->core_id = rcore;
+			topo->thread_id = lcpu + i;
 			cpumask_set_cpu(lcpu + i, &book->mask);
 			cpumask_set_cpu(lcpu + i, &socket->mask);
 			if (one_socket_per_cpu)
-				per_cpu(cpu_topology, lcpu + i).socket_id = rcore;
+				topo->socket_id = rcore;
 			else
-				per_cpu(cpu_topology, lcpu + i).socket_id = socket->id;
+				topo->socket_id = socket->id;
 			smp_cpu_set_polarization(lcpu + i, tl_core->pp);
 		}
 		if (one_socket_per_cpu)
@@ -247,17 +249,19 @@
 
 static void update_cpu_masks(void)
 {
+	struct cpu_topology_s390 *topo;
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
-		per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
-		per_cpu(cpu_topology, cpu).book_mask = cpu_group_map(&book_info, cpu);
+		topo = &per_cpu(cpu_topology, cpu);
+		topo->thread_mask = cpu_thread_map(cpu);
+		topo->core_mask = cpu_group_map(&socket_info, cpu);
+		topo->book_mask = cpu_group_map(&book_info, cpu);
 		if (!MACHINE_HAS_TOPOLOGY) {
-			per_cpu(cpu_topology, cpu).thread_id = cpu;
-			per_cpu(cpu_topology, cpu).core_id = cpu;
-			per_cpu(cpu_topology, cpu).socket_id = cpu;
-			per_cpu(cpu_topology, cpu).book_id = cpu;
+			topo->thread_id = cpu;
+			topo->core_id = cpu;
+			topo->socket_id = cpu;
+			topo->book_id = cpu;
 		}
 	}
 	numa_update_cpu_topology();
diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c
new file mode 100644
index 0000000..73239bb
--- /dev/null
+++ b/arch/s390/kernel/trace.c
@@ -0,0 +1,29 @@
+/*
+ * Tracepoint definitions for s390
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/percpu.h>
+#define CREATE_TRACE_POINTS
+#include <asm/trace/diag.h>
+
+EXPORT_TRACEPOINT_SYMBOL(diagnose);
+
+static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth);
+
+void trace_diagnose_norecursion(int diag_nr)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	local_irq_save(flags);
+	depth = this_cpu_ptr(&diagnose_trace_depth);
+	if (*depth == 0) {
+		(*depth)++;
+		trace_diagnose(diag_nr);
+		(*depth)--;
+	}
+	local_irq_restore(flags);
+}
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 9861613..1b18118 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -19,7 +19,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/api.h>
 #include "entry.h"
 
 int show_unhandled_signals = 1;
@@ -224,29 +224,6 @@
 DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
 	      "specification exception");
 
-int alloc_vector_registers(struct task_struct *tsk)
-{
-	__vector128 *vxrs;
-	freg_t *fprs;
-
-	/* Allocate vector register save area. */
-	vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
-		       GFP_KERNEL|__GFP_REPEAT);
-	if (!vxrs)
-		return -ENOMEM;
-	preempt_disable();
-	if (tsk == current)
-		save_fpu_regs();
-	/* Copy the 16 floating point registers */
-	convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs);
-	fprs = tsk->thread.fpu.fprs;
-	tsk->thread.fpu.vxrs = vxrs;
-	tsk->thread.fpu.flags |= FPU_USE_VX;
-	kfree(fprs);
-	preempt_enable();
-	return 0;
-}
-
 void vector_exception(struct pt_regs *regs)
 {
 	int si_code, vic;
@@ -281,13 +258,6 @@
 	do_trap(regs, SIGFPE, si_code, "vector exception");
 }
 
-static int __init disable_vector_extension(char *str)
-{
-	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
-	return 1;
-}
-__setup("novx", disable_vector_extension);
-
 void data_exception(struct pt_regs *regs)
 {
 	__u16 __user *location;
@@ -296,15 +266,6 @@
 	location = get_trap_ip(regs);
 
 	save_fpu_regs();
-	/* Check for vector register enablement */
-	if (MACHINE_HAS_VX && !is_vx_task(current) &&
-	    (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) {
-		alloc_vector_registers(current);
-		/* Vector data exception is suppressing, rewind psw. */
-		regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
-		clear_pt_regs_flag(regs, PIF_PER_TRAP);
-		return;
-	}
 	if (current->thread.fpu.fpc & FPC_DXC_MASK)
 		signal = SIGFPE;
 	else
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 0d58269..59eddb0 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -299,7 +299,7 @@
 
 	get_page(virt_to_page(vdso_data));
 
-	smp_wmb();
+	smp_mb();
 
 	return 0;
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0a67c40..c6b4063 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1292,7 +1292,6 @@
 static inline void save_fpu_to(struct fpu *dst)
 {
 	dst->fpc = current->thread.fpu.fpc;
-	dst->flags = current->thread.fpu.flags;
 	dst->regs = current->thread.fpu.regs;
 }
 
@@ -1303,7 +1302,6 @@
 static inline void load_fpu_from(struct fpu *from)
 {
 	current->thread.fpu.fpc = from->fpc;
-	current->thread.fpu.flags = from->flags;
 	current->thread.fpu.regs = from->regs;
 }
 
@@ -1315,15 +1313,12 @@
 
 	if (test_kvm_facility(vcpu->kvm, 129)) {
 		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
-		current->thread.fpu.flags = FPU_USE_VX;
 		/*
 		 * Use the register save area in the SIE-control block
 		 * for register restore and save in kvm_arch_vcpu_put()
 		 */
 		current->thread.fpu.vxrs =
 			(__vector128 *)&vcpu->run->s.regs.vrs;
-		/* Always enable the vector extension for KVM */
-		__ctl_set_vx();
 	} else
 		load_fpu_from(&vcpu->arch.guest_fpregs);
 
@@ -2326,7 +2321,6 @@
 		 * registers and the FPC value and store them in the
 		 * guest_fpregs structure.
 		 */
-		WARN_ON(!is_vx_task(current));	  /* XXX remove later */
 		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
 		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
 				 current->thread.fpu.vxrs);
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 246a7eb..501dcd4 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -12,8 +12,10 @@
 #include <linux/module.h>
 #include <linux/irqflags.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <asm/vtimer.h>
 #include <asm/div64.h>
+#include <asm/idle.h>
 
 void __delay(unsigned long loops)
 {
@@ -30,26 +32,22 @@
 
 static void __udelay_disabled(unsigned long long usecs)
 {
-	unsigned long cr0, cr6, new;
-	u64 clock_saved, end;
+	unsigned long cr0, cr0_new, psw_mask;
+	struct s390_idle_data idle;
+	u64 end;
 
 	end = get_tod_clock() + (usecs << 12);
-	clock_saved = local_tick_disable();
 	__ctl_store(cr0, 0, 0);
-	__ctl_store(cr6, 6, 6);
-	new = (cr0 &  0xffff00e0) | 0x00000800;
-	__ctl_load(new , 0, 0);
-	new = 0;
-	__ctl_load(new, 6, 6);
-	lockdep_off();
-	do {
-		set_clock_comparator(end);
-		enabled_wait();
-	} while (get_tod_clock_fast() < end);
-	lockdep_on();
+	cr0_new = cr0 & ~CR0_IRQ_SUBCLASS_MASK;
+	cr0_new |= (1UL << (63 - 52)); /* enable clock comparator irq */
+	__ctl_load(cr0_new, 0, 0);
+	psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT;
+	set_clock_comparator(end);
+	set_cpu_flag(CIF_IGNORE_IRQ);
+	psw_idle(&idle, psw_mask);
+	clear_cpu_flag(CIF_IGNORE_IRQ);
+	set_clock_comparator(S390_lowcore.clock_comparator);
 	__ctl_load(cr0, 0, 0);
-	__ctl_load(cr6, 6, 6);
-	local_tick_enable(clock_saved);
 }
 
 static void __udelay_enabled(unsigned long long usecs)
diff --git a/arch/s390/lib/find.c b/arch/s390/lib/find.c
index 922003c..d90b924 100644
--- a/arch/s390/lib/find.c
+++ b/arch/s390/lib/find.c
@@ -1,10 +1,8 @@
 /*
  * MSB0 numbered special bitops handling.
  *
- * On s390x the bits are numbered:
+ * The bits are numbered:
  *   |0..............63|64............127|128...........191|192...........255|
- * and on s390:
- *   |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
  *
  * The reason for this bit numbering is the fact that the hardware sets bits
  * in a bitmap starting at bit 0 (MSB) and we don't want to scan the bitmap
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index d6c9991..427aa44 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -197,7 +197,7 @@
 		}
 		old = ACCESS_ONCE(rw->lock);
 		owner = ACCESS_ONCE(rw->owner);
-		smp_rmb();
+		smp_mb();
 		if ((int) old >= 0) {
 			prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
 			old = prev;
@@ -231,7 +231,7 @@
 		    _raw_compare_and_swap(&rw->lock, old, old | 0x80000000))
 			prev = old;
 		else
-			smp_rmb();
+			smp_mb();
 		if ((old & 0x7fffffff) == 0 && (int) prev >= 0)
 			break;
 		if (MACHINE_HAS_CAD)
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 23c4969..18fccc3 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -18,6 +18,7 @@
 #include <linux/bootmem.h>
 #include <linux/ctype.h>
 #include <linux/ioport.h>
+#include <asm/diag.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/ebcdic.h>
@@ -112,6 +113,7 @@
 	ry = DCSS_FINDSEGX;
 
 	strcpy(name, "dummy");
+	diag_stat_inc(DIAG_STAT_X064);
 	asm volatile(
 		"	diag	%0,%1,0x64\n"
 		"0:	ipm	%2\n"
@@ -205,6 +207,7 @@
 	ry = (unsigned long) *func;
 
 	/* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */
+	diag_stat_inc(DIAG_STAT_X064);
 	if (*func > DCSS_SEGEXT)
 		asm volatile(
 			"	diag	%0,%1,0x64\n"
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index f985856..ec1a30d 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -30,6 +30,7 @@
 #include <linux/uaccess.h>
 #include <linux/hugetlb.h>
 #include <asm/asm-offsets.h>
+#include <asm/diag.h>
 #include <asm/pgtable.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
@@ -589,7 +590,7 @@
 		.reffcode = 0,
 		.refdwlen = 5,
 		.refversn = 2,
-		.refgaddr = __LC_CURRENT_PID,
+		.refgaddr = __LC_LPP,
 		.refselmk = 1ULL << 48,
 		.refcmpmk = 1ULL << 48,
 		.reserved = __PF_RES_FIELD };
@@ -597,6 +598,7 @@
 
 	if (pfault_disable)
 		return -1;
+	diag_stat_inc(DIAG_STAT_X258);
 	asm volatile(
 		"	diag	%1,%0,0x258\n"
 		"0:	j	2f\n"
@@ -618,6 +620,7 @@
 
 	if (pfault_disable)
 		return;
+	diag_stat_inc(DIAG_STAT_X258);
 	asm volatile(
 		"	diag	%0,0,0x258\n"
 		"0:\n"
@@ -646,7 +649,7 @@
 		return;
 	inc_irq_stat(IRQEXT_PFL);
 	/* Get the token (= pid of the affected task). */
-	pid = param64;
+	pid = param64 & LPP_PFAULT_PID_MASK;
 	rcu_read_lock();
 	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
 	if (tsk)
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index fb4bf2c..f81096b 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -40,6 +40,7 @@
 		pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT);
 		pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
 		pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
+		pmd_val(pmd) |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
 	} else
 		pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
 	return pmd;
@@ -78,6 +79,7 @@
 		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
 		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10;
 		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10;
+		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
 	} else
 		pte_val(pte) = _PAGE_INVALID;
 	return pte;
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
index 30b2698a..828d069 100644
--- a/arch/s390/numa/mode_emu.c
+++ b/arch/s390/numa/mode_emu.c
@@ -436,9 +436,15 @@
  */
 static unsigned long emu_setup_size_adjust(unsigned long size)
 {
+	unsigned long size_new;
+
 	size = size ? : CONFIG_EMU_SIZE;
-	size = roundup(size, memory_block_size_bytes());
-	return size;
+	size_new = roundup(size, memory_block_size_bytes());
+	if (size_new == size)
+		return size;
+	pr_warn("Increasing memory stripe size from %ld MB to %ld MB\n",
+		size >> 20, size_new >> 20);
+	return size_new;
 }
 
 /*
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index dcc2634..10ca15d 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -16,11 +16,11 @@
 static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
 {
 	struct {
-		u8 cc;
-		u8 status;
 		u64 req;
 		u64 offset;
-	} data = {cc, status, req, offset};
+		u8 cc;
+		u8 status;
+	} __packed data = {req, offset, cc, status};
 
 	zpci_err_hex(&data, sizeof(data));
 }
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index c0b41f11..6ec0c8b 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -325,6 +325,16 @@
 	return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
 }
 
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+	return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
+}
+
+static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
+{
+	return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
+}
+
 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 
 /*
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index f73d2f5..a263c10 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3030,6 +3030,7 @@
 	} else {
 		max = block->base->discipline->max_blocks << block->s2b_shift;
 	}
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, block->request_queue);
 	blk_queue_logical_block_size(block->request_queue,
 				     block->bp_block);
 	blk_queue_max_hw_sectors(block->request_queue, max);
diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index fe07f31..184b1db 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c
@@ -824,8 +824,11 @@
 		 * were waiting for the flush
 		 */
 		if (device == list_first_entry(&active,
-					       struct dasd_device, alias_list))
+					       struct dasd_device, alias_list)) {
 			list_move(&device->alias_list, &lcu->active_devices);
+			private = (struct dasd_eckd_private *) device->private;
+			private->pavgroup = NULL;
+		}
 	}
 	spin_unlock_irqrestore(&lcu->lock, flags);
 }
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index c062f16..cb61f30 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -21,6 +21,7 @@
 
 #include <asm/dasd.h>
 #include <asm/debug.h>
+#include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -76,6 +77,7 @@
 	int rc;
 
 	rc = 3;
+	diag_stat_inc(DIAG_STAT_X250);
 	asm volatile(
 		"	diag	2,%2,0x250\n"
 		"0:	ipm	%0\n"
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 62a3235..9083247 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1032,6 +1032,21 @@
 		return 0;
 }
 
+static void dasd_eckd_clear_conf_data(struct dasd_device *device)
+{
+	struct dasd_eckd_private *private;
+	int i;
+
+	private = (struct dasd_eckd_private *) device->private;
+	private->conf_data = NULL;
+	private->conf_len = 0;
+	for (i = 0; i < 8; i++) {
+		kfree(private->path_conf_data[i]);
+		private->path_conf_data[i] = NULL;
+	}
+}
+
+
 static int dasd_eckd_read_conf(struct dasd_device *device)
 {
 	void *conf_data;
@@ -1068,20 +1083,10 @@
 			path_data->opm |= lpm;
 			continue;	/* no error */
 		}
-		/* translate path mask to position in mask */
-		pos = 8 - ffs(lpm);
-		kfree(private->path_conf_data[pos]);
-		if ((__u8 *)private->path_conf_data[pos] ==
-		    private->conf_data) {
-			private->conf_data = NULL;
-			private->conf_len = 0;
-			conf_data_saved = 0;
-		}
-		private->path_conf_data[pos] =
-			(struct dasd_conf_data *) conf_data;
 		/* save first valid configuration data */
 		if (!conf_data_saved) {
-			kfree(private->conf_data);
+			/* initially clear previously stored conf_data */
+			dasd_eckd_clear_conf_data(device);
 			private->conf_data = conf_data;
 			private->conf_len = conf_len;
 			if (dasd_eckd_identify_conf_parts(private)) {
@@ -1090,6 +1095,10 @@
 				kfree(conf_data);
 				continue;
 			}
+			pos = pathmask_to_pos(lpm);
+			/* store per path conf_data */
+			private->path_conf_data[pos] =
+				(struct dasd_conf_data *) conf_data;
 			/*
 			 * build device UID that other path data
 			 * can be compared to it
@@ -1147,7 +1156,10 @@
 				path_data->cablepm |= lpm;
 				continue;
 			}
-
+			pos = pathmask_to_pos(lpm);
+			/* store per path conf_data */
+			private->path_conf_data[pos] =
+				(struct dasd_conf_data *) conf_data;
 			path_private.conf_data = NULL;
 			path_private.conf_len = 0;
 		}
@@ -1159,7 +1171,12 @@
 			path_data->ppm |= lpm;
 			break;
 		}
-		path_data->opm |= lpm;
+		if (!path_data->opm) {
+			path_data->opm = lpm;
+			dasd_generic_path_operational(device);
+		} else {
+			path_data->opm |= lpm;
+		}
 		/*
 		 * if the path is used
 		 * it should not be in one of the negative lists
@@ -4423,7 +4440,12 @@
 	private = (struct dasd_eckd_private *) device->private;
 
 	/* Read Configuration Data */
-	dasd_eckd_read_conf(device);
+	rc = dasd_eckd_read_conf(device);
+	if (rc) {
+		DBF_EVENT_DEVID(DBF_WARNING, device->cdev,
+				"Read configuration data failed, rc=%d", rc);
+		goto out_err;
+	}
 
 	dasd_eckd_get_uid(device, &temp_uid);
 	/* Generate device unique id */
@@ -4439,13 +4461,18 @@
 	/* register lcu with alias handling, enable PAV if this is a new lcu */
 	rc = dasd_alias_make_device_known_to_lcu(device);
 	if (rc)
-		return rc;
+		goto out_err;
 
 	set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr_flags);
 	dasd_eckd_validate_server(device, cqr_flags);
 
 	/* RE-Read Configuration Data */
-	dasd_eckd_read_conf(device);
+	rc = dasd_eckd_read_conf(device);
+	if (rc) {
+		DBF_EVENT_DEVID(DBF_WARNING, device->cdev,
+			"Read configuration data failed, rc=%d", rc);
+		goto out_err2;
+	}
 
 	/* Read Feature Codes */
 	dasd_eckd_read_features(device);
@@ -4456,7 +4483,7 @@
 	if (rc) {
 		DBF_EVENT_DEVID(DBF_WARNING, device->cdev,
 				"Read device characteristic failed, rc=%d", rc);
-		goto out_err;
+		goto out_err2;
 	}
 	spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
 	memcpy(&private->rdc_data, &temp_rdc_data, sizeof(temp_rdc_data));
@@ -4467,6 +4494,8 @@
 
 	return 0;
 
+out_err2:
+	dasd_alias_disconnect_device_from_lcu(device);
 out_err:
 	return -1;
 }
@@ -4671,7 +4700,7 @@
 			return conf_data;
 	}
 out:
-	return private->path_conf_data[8 - ffs(lpum)];
+	return private->path_conf_data[pathmask_to_pos(lpum)];
 }
 
 /*
@@ -4716,7 +4745,7 @@
 	for (path = 0x80; path; path >>= 1) {
 		/* initialise data per path */
 		bitmask = mask;
-		pos = 8 - ffs(path);
+		pos = pathmask_to_pos(path);
 		conf_data = private->path_conf_data[pos];
 		pos = 8 - ffs(cuir->ned_map);
 		ned = (char *) &conf_data->neds[pos];
@@ -4937,9 +4966,7 @@
 		      ((u64 *)cuir)[0], ((u64 *)cuir)[1], ((u64 *)cuir)[2],
 		      ((u32 *)cuir)[3]);
 	ccw_device_get_schid(device->cdev, &sch_id);
-	/* get position of path in mask */
-	pos = 8 - ffs(lpum);
-	/* get channel path descriptor from this position */
+	pos = pathmask_to_pos(lpum);
 	desc = ccw_device_get_chp_desc(device->cdev, pos);
 
 	if (cuir->code == CUIR_QUIESCE) {
diff --git a/drivers/s390/char/diag_ftp.c b/drivers/s390/char/diag_ftp.c
index 12db8db..a5ccbf6 100644
--- a/drivers/s390/char/diag_ftp.c
+++ b/drivers/s390/char/diag_ftp.c
@@ -15,6 +15,7 @@
 #include <linux/wait.h>
 #include <linux/string.h>
 #include <asm/ctl_reg.h>
+#include <asm/diag.h>
 
 #include "hmcdrv_ftp.h"
 #include "diag_ftp.h"
@@ -102,6 +103,7 @@
 {
 	int rc;
 
+	diag_stat_inc(DIAG_STAT_X2C4);
 	asm volatile(
 		"	diag	%[addr],%[cmd],0x2c4\n"
 		"0:	j	2f\n"
diff --git a/drivers/s390/char/sclp_rw.c b/drivers/s390/char/sclp_rw.c
index 35a84af..6010cd3 100644
--- a/drivers/s390/char/sclp_rw.c
+++ b/drivers/s390/char/sclp_rw.c
@@ -47,9 +47,9 @@
 sclp_make_buffer(void *page, unsigned short columns, unsigned short htab)
 {
 	struct sclp_buffer *buffer;
-	struct write_sccb *sccb;
+	struct sccb_header *sccb;
 
-	sccb = (struct write_sccb *) page;
+	sccb = (struct sccb_header *) page;
 	/*
 	 * We keep the struct sclp_buffer structure at the end
 	 * of the sccb page.
@@ -57,24 +57,16 @@
 	buffer = ((struct sclp_buffer *) ((addr_t) sccb + PAGE_SIZE)) - 1;
 	buffer->sccb = sccb;
 	buffer->retry_count = 0;
-	buffer->mto_number = 0;
-	buffer->mto_char_sum = 0;
+	buffer->messages = 0;
+	buffer->char_sum = 0;
 	buffer->current_line = NULL;
 	buffer->current_length = 0;
 	buffer->columns = columns;
 	buffer->htab = htab;
 
 	/* initialize sccb */
-	memset(sccb, 0, sizeof(struct write_sccb));
-	sccb->header.length = sizeof(struct write_sccb);
-	sccb->msg_buf.header.length = sizeof(struct msg_buf);
-	sccb->msg_buf.header.type = EVTYP_MSG;
-	sccb->msg_buf.mdb.header.length = sizeof(struct mdb);
-	sccb->msg_buf.mdb.header.type = 1;
-	sccb->msg_buf.mdb.header.tag = 0xD4C4C240;	/* ebcdic "MDB " */
-	sccb->msg_buf.mdb.header.revision_code = 1;
-	sccb->msg_buf.mdb.go.length = sizeof(struct go);
-	sccb->msg_buf.mdb.go.type = 1;
+	memset(sccb, 0, sizeof(struct sccb_header));
+	sccb->length = sizeof(struct sccb_header);
 
 	return buffer;
 }
@@ -90,37 +82,49 @@
 }
 
 /*
- * Initialize a new Message Text Object (MTO) at the end of the provided buffer
- * with enough room for max_len characters. Return 0 on success.
+ * Initialize a new message the end of the provided buffer with
+ * enough room for max_len characters. Return 0 on success.
  */
 static int
 sclp_initialize_mto(struct sclp_buffer *buffer, int max_len)
 {
-	struct write_sccb *sccb;
+	struct sccb_header *sccb;
+	struct msg_buf *msg;
+	struct mdb *mdb;
+	struct go *go;
 	struct mto *mto;
-	int mto_size;
+	int msg_size;
 
-	/* max size of new Message Text Object including message text  */
-	mto_size = sizeof(struct mto) + max_len;
+	/* max size of new message including message text  */
+	msg_size = sizeof(struct msg_buf) + max_len;
 
 	/* check if current buffer sccb can contain the mto */
 	sccb = buffer->sccb;
-	if ((MAX_SCCB_ROOM - sccb->header.length) < mto_size)
+	if ((MAX_SCCB_ROOM - sccb->length) < msg_size)
 		return -ENOMEM;
 
-	/* find address of new message text object */
-	mto = (struct mto *)(((addr_t) sccb) + sccb->header.length);
+	msg = (struct msg_buf *)((addr_t) sccb + sccb->length);
+	memset(msg, 0, sizeof(struct msg_buf));
+	msg->header.length = sizeof(struct msg_buf);
+	msg->header.type = EVTYP_MSG;
 
-	/*
-	 * fill the new Message-Text Object,
-	 * starting behind the former last byte of the SCCB
-	 */
-	memset(mto, 0, sizeof(struct mto));
+	mdb = &msg->mdb;
+	mdb->header.length = sizeof(struct mdb);
+	mdb->header.type = 1;
+	mdb->header.tag = 0xD4C4C240;	/* ebcdic "MDB " */
+	mdb->header.revision_code = 1;
+
+	go = &mdb->go;
+	go->length = sizeof(struct go);
+	go->type = 1;
+
+	mto = &mdb->mto;
 	mto->length = sizeof(struct mto);
 	mto->type = 4;	/* message text object */
 	mto->line_type_flags = LNTPFLGS_ENDTEXT; /* end text */
 
 	/* set pointer to first byte after struct mto. */
+	buffer->current_msg = msg;
 	buffer->current_line = (char *) (mto + 1);
 	buffer->current_length = 0;
 
@@ -128,45 +132,37 @@
 }
 
 /*
- * Finalize MTO initialized by sclp_initialize_mto(), updating the sizes of
- * MTO, enclosing MDB, event buffer and SCCB.
+ * Finalize message initialized by sclp_initialize_mto(),
+ * updating the sizes of MTO, enclosing MDB, event buffer and SCCB.
  */
 static void
 sclp_finalize_mto(struct sclp_buffer *buffer)
 {
-	struct write_sccb *sccb;
-	struct mto *mto;
-	int str_len, mto_size;
-
-	str_len = buffer->current_length;
-	buffer->current_line = NULL;
-	buffer->current_length = 0;
-
-	/* real size of new Message Text Object including message text	*/
-	mto_size = sizeof(struct mto) + str_len;
-
-	/* find address of new message text object */
-	sccb = buffer->sccb;
-	mto = (struct mto *)(((addr_t) sccb) + sccb->header.length);
-
-	/* set size of message text object */
-	mto->length = mto_size;
+	struct sccb_header *sccb;
+	struct msg_buf *msg;
 
 	/*
 	 * update values of sizes
 	 * (SCCB, Event(Message) Buffer, Message Data Block)
 	 */
-	sccb->header.length += mto_size;
-	sccb->msg_buf.header.length += mto_size;
-	sccb->msg_buf.mdb.header.length += mto_size;
+	sccb = buffer->sccb;
+	msg = buffer->current_msg;
+	msg->header.length += buffer->current_length;
+	msg->mdb.header.length += buffer->current_length;
+	msg->mdb.mto.length += buffer->current_length;
+	sccb->length += msg->header.length;
 
 	/*
 	 * count number of buffered messages (= number of Message Text
 	 * Objects) and number of buffered characters
 	 * for the SCCB currently used for buffering and at all
 	 */
-	buffer->mto_number++;
-	buffer->mto_char_sum += str_len;
+	buffer->messages++;
+	buffer->char_sum += buffer->current_length;
+
+	buffer->current_line = NULL;
+	buffer->current_length = 0;
+	buffer->current_msg = NULL;
 }
 
 /*
@@ -218,7 +214,13 @@
 			break;
 		case '\a':	/* bell, one for several times	*/
 			/* set SCLP sound alarm bit in General Object */
-			buffer->sccb->msg_buf.mdb.go.general_msg_flags |=
+			if (buffer->current_line == NULL) {
+				rc = sclp_initialize_mto(buffer,
+							 buffer->columns);
+				if (rc)
+					return i_msg;
+			}
+			buffer->current_msg->mdb.go.general_msg_flags |=
 				GNRLMSGFLGS_SNDALRM;
 			break;
 		case '\t':	/* horizontal tabulator	 */
@@ -309,11 +311,13 @@
 int
 sclp_buffer_space(struct sclp_buffer *buffer)
 {
+	struct sccb_header *sccb;
 	int count;
 
-	count = MAX_SCCB_ROOM - buffer->sccb->header.length;
+	sccb = buffer->sccb;
+	count = MAX_SCCB_ROOM - sccb->length;
 	if (buffer->current_line != NULL)
-		count -= sizeof(struct mto) + buffer->current_length;
+		count -= sizeof(struct msg_buf) + buffer->current_length;
 	return count;
 }
 
@@ -325,7 +329,7 @@
 {
 	int count;
 
-	count = buffer->mto_char_sum;
+	count = buffer->char_sum;
 	if (buffer->current_line != NULL)
 		count += buffer->current_length;
 	return count;
@@ -378,7 +382,7 @@
 {
 	int rc;
 	struct sclp_buffer *buffer;
-	struct write_sccb *sccb;
+	struct sccb_header *sccb;
 
 	buffer = (struct sclp_buffer *) data;
 	sccb = buffer->sccb;
@@ -389,7 +393,7 @@
 		return;
 	}
 	/* check SCLP response code and choose suitable action	*/
-	switch (sccb->header.response_code) {
+	switch (sccb->response_code) {
 	case 0x0020 :
 		/* Normal completion, buffer processed, message(s) sent */
 		rc = 0;
@@ -403,7 +407,7 @@
 		/* remove processed buffers and requeue rest */
 		if (sclp_remove_processed((struct sccb_header *) sccb) > 0) {
 			/* not all buffers were processed */
-			sccb->header.response_code = 0x0000;
+			sccb->response_code = 0x0000;
 			buffer->request.status = SCLP_REQ_FILLED;
 			rc = sclp_add_request(request);
 			if (rc == 0)
@@ -419,14 +423,14 @@
 			break;
 		}
 		/* retry request */
-		sccb->header.response_code = 0x0000;
+		sccb->response_code = 0x0000;
 		buffer->request.status = SCLP_REQ_FILLED;
 		rc = sclp_add_request(request);
 		if (rc == 0)
 			return;
 		break;
 	default:
-		if (sccb->header.response_code == 0x71f0)
+		if (sccb->response_code == 0x71f0)
 			rc = -ENOMEM;
 		else
 			rc = -EINVAL;
@@ -445,25 +449,19 @@
 sclp_emit_buffer(struct sclp_buffer *buffer,
 		 void (*callback)(struct sclp_buffer *, int))
 {
-	struct write_sccb *sccb;
-
 	/* add current line if there is one */
 	if (buffer->current_line != NULL)
 		sclp_finalize_mto(buffer);
 
 	/* Are there messages in the output buffer ? */
-	if (buffer->mto_number == 0)
+	if (buffer->messages == 0)
 		return -EIO;
 
-	sccb = buffer->sccb;
-	/* Use normal write message */
-	sccb->msg_buf.header.type = EVTYP_MSG;
-
 	buffer->request.command = SCLP_CMDW_WRITE_EVENT_DATA;
 	buffer->request.status = SCLP_REQ_FILLED;
 	buffer->request.callback = sclp_writedata_callback;
 	buffer->request.callback_data = buffer;
-	buffer->request.sccb = sccb;
+	buffer->request.sccb = buffer->sccb;
 	buffer->callback = callback;
 	return sclp_add_request(&buffer->request);
 }
diff --git a/drivers/s390/char/sclp_rw.h b/drivers/s390/char/sclp_rw.h
index 7a7bfc9..e3b0290 100644
--- a/drivers/s390/char/sclp_rw.h
+++ b/drivers/s390/char/sclp_rw.h
@@ -45,6 +45,7 @@
 struct mdb {
 	struct mdb_header header;
 	struct go go;
+	struct mto mto;
 } __attribute__((packed));
 
 struct msg_buf {
@@ -52,14 +53,9 @@
 	struct mdb mdb;
 } __attribute__((packed));
 
-struct write_sccb {
-	struct sccb_header header;
-	struct msg_buf msg_buf;
-} __attribute__((packed));
-
 /* The number of empty mto buffers that can be contained in a single sccb. */
-#define NR_EMPTY_MTO_PER_SCCB ((PAGE_SIZE - sizeof(struct sclp_buffer) - \
-			sizeof(struct write_sccb)) / sizeof(struct mto))
+#define NR_EMPTY_MSG_PER_SCCB ((PAGE_SIZE - sizeof(struct sclp_buffer) - \
+			sizeof(struct sccb_header)) / sizeof(struct msg_buf))
 
 /*
  * data structure for information about list of SCCBs (only for writing),
@@ -68,7 +64,8 @@
 struct sclp_buffer {
 	struct list_head list;		/* list_head for sccb_info chain */
 	struct sclp_req request;
-	struct write_sccb *sccb;
+	void *sccb;
+	struct msg_buf *current_msg;
 	char *current_line;
 	int current_length;
 	int retry_count;
@@ -76,8 +73,8 @@
 	unsigned short columns;
 	unsigned short htab;
 	/* statistics about this buffer */
-	unsigned int mto_char_sum;	/* # chars in sccb */
-	unsigned int mto_number;	/* # mtos in sccb */
+	unsigned int char_sum;		/* # chars in sccb */
+	unsigned int messages;		/* # messages in sccb */
 	/* Callback that is called after reaching final status. */
 	void (*callback)(struct sclp_buffer *, int);
 };
diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c
index 0036632..3c6e174 100644
--- a/drivers/s390/char/sclp_tty.c
+++ b/drivers/s390/char/sclp_tty.c
@@ -84,8 +84,8 @@
  * to change as output buffers get emptied, or if the output flow
  * control is acted. This is not an exact number because not every
  * character needs the same space in the sccb. The worst case is
- * a string of newlines. Every newlines creates a new mto which
- * needs 8 bytes.
+ * a string of newlines. Every newline creates a new message which
+ * needs 82 bytes.
  */
 static int
 sclp_tty_write_room (struct tty_struct *tty)
@@ -97,9 +97,9 @@
 	spin_lock_irqsave(&sclp_tty_lock, flags);
 	count = 0;
 	if (sclp_ttybuf != NULL)
-		count = sclp_buffer_space(sclp_ttybuf) / sizeof(struct mto);
+		count = sclp_buffer_space(sclp_ttybuf) / sizeof(struct msg_buf);
 	list_for_each(l, &sclp_tty_pages)
-		count += NR_EMPTY_MTO_PER_SCCB;
+		count += NR_EMPTY_MSG_PER_SCCB;
 	spin_unlock_irqrestore(&sclp_tty_lock, flags);
 	return count;
 }
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 07fc5d9..b5620e8 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -476,26 +476,6 @@
 	return 0;
 }
 
-static int cio_validate_io_subchannel(struct subchannel *sch)
-{
-	/* Initialization for io subchannels. */
-	if (!css_sch_is_valid(&sch->schib))
-		return -ENODEV;
-
-	/* Devno is valid. */
-	return cio_check_devno_blacklisted(sch);
-}
-
-static int cio_validate_msg_subchannel(struct subchannel *sch)
-{
-	/* Initialization for message subchannels. */
-	if (!css_sch_is_valid(&sch->schib))
-		return -ENODEV;
-
-	/* Devno is valid. */
-	return cio_check_devno_blacklisted(sch);
-}
-
 /**
  * cio_validate_subchannel - basic validation of subchannel
  * @sch: subchannel structure to be filled out
@@ -533,10 +513,11 @@
 
 	switch (sch->st) {
 	case SUBCHANNEL_TYPE_IO:
-		err = cio_validate_io_subchannel(sch);
-		break;
 	case SUBCHANNEL_TYPE_MSG:
-		err = cio_validate_msg_subchannel(sch);
+		if (!css_sch_is_valid(&sch->schib))
+			err = -ENODEV;
+		else
+			err = cio_check_devno_blacklisted(sch);
 		break;
 	default:
 		err = 0;
@@ -826,11 +807,11 @@
 static void s390_reset_chpids_mcck_handler(void)
 {
 	struct crw crw;
-	struct mci *mci;
+	union mci mci;
 
 	/* Check for pending channel report word. */
-	mci = (struct mci *)&S390_lowcore.mcck_interruption_code;
-	if (!mci->cp)
+	mci.val = S390_lowcore.mcck_interruption_code;
+	if (!mci.cp)
 		return;
 	/* Process channel report words. */
 	while (stcrw(&crw) == 0) {
diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c
index 23054f8..b2afad5 100644
--- a/drivers/s390/cio/cmf.c
+++ b/drivers/s390/cio/cmf.c
@@ -113,7 +113,6 @@
  * @readall:	read a measurement block in a common format
  * @reset:	clear the data in the associated measurement block and
  *		reset its time stamp
- * @align:	align an allocated block so that the hardware can use it
  */
 struct cmb_operations {
 	int  (*alloc)  (struct ccw_device *);
@@ -122,7 +121,6 @@
 	u64  (*read)   (struct ccw_device *, int);
 	int  (*readall)(struct ccw_device *, struct cmbdata *);
 	void (*reset)  (struct ccw_device *);
-	void *(*align) (void *);
 /* private: */
 	struct attribute_group *attr_group;
 };
@@ -186,9 +184,8 @@
 static int set_schib(struct ccw_device *cdev, u32 mme, int mbfc,
 		     unsigned long address)
 {
-	struct subchannel *sch;
-
-	sch = to_subchannel(cdev->dev.parent);
+	struct subchannel *sch = to_subchannel(cdev->dev.parent);
+	int ret;
 
 	sch->config.mme = mme;
 	sch->config.mbfc = mbfc;
@@ -198,7 +195,15 @@
 	else
 		sch->config.mbi = address;
 
-	return cio_commit_config(sch);
+	ret = cio_commit_config(sch);
+	if (!mme && ret == -ENODEV) {
+		/*
+		 * The task was to disable measurement block updates but
+		 * the subchannel is already gone. Report success.
+		 */
+		ret = 0;
+	}
+	return ret;
 }
 
 struct set_schib_struct {
@@ -314,7 +319,7 @@
 			return -EBUSY;
 	}
 	cmb_data = cdev->private->cmb;
-	hw_block = cmbops->align(cmb_data->hw_block);
+	hw_block = cmb_data->hw_block;
 	if (!memcmp(cmb_data->last_block, hw_block, cmb_data->size))
 		/* No need to copy. */
 		return 0;
@@ -425,7 +430,7 @@
 		 * Need to reset hw block as well to make the hardware start
 		 * from 0 again.
 		 */
-		memset(cmbops->align(cmb_data->hw_block), 0, cmb_data->size);
+		memset(cmb_data->hw_block, 0, cmb_data->size);
 		cmb_data->last_update = 0;
 	}
 	cdev->private->cmb_start_time = get_tod_clock();
@@ -606,12 +611,6 @@
 	spin_lock_irq(cdev->ccwlock);
 
 	priv = cdev->private;
-
-	if (list_empty(&priv->cmb_list)) {
-		/* already freed */
-		goto out;
-	}
-
 	cmb_data = priv->cmb;
 	priv->cmb = NULL;
 	if (cmb_data)
@@ -626,7 +625,6 @@
 		free_pages((unsigned long)cmb_area.mem, get_order(size));
 		cmb_area.mem = NULL;
 	}
-out:
 	spin_unlock_irq(cdev->ccwlock);
 	spin_unlock(&cmb_area.lock);
 }
@@ -755,11 +753,6 @@
 	cmf_generic_reset(cdev);
 }
 
-static void * align_cmb(void *area)
-{
-	return area;
-}
-
 static struct attribute_group cmf_attr_group;
 
 static struct cmb_operations cmbops_basic = {
@@ -769,7 +762,6 @@
 	.read	= read_cmb,
 	.readall    = readall_cmb,
 	.reset	    = reset_cmb,
-	.align	    = align_cmb,
 	.attr_group = &cmf_attr_group,
 };
 
@@ -804,64 +796,57 @@
 	u32 device_busy_time;
 	u32 initial_command_response_time;
 	u32 reserved[7];
-};
+} __packed __aligned(64);
 
-/*
- * kmalloc only guarantees 8 byte alignment, but we need cmbe
- * pointers to be naturally aligned. Make sure to allocate
- * enough space for two cmbes.
- */
-static inline struct cmbe *cmbe_align(struct cmbe *c)
-{
-	unsigned long addr;
-	addr = ((unsigned long)c + sizeof (struct cmbe) - sizeof(long)) &
-				 ~(sizeof (struct cmbe) - sizeof(long));
-	return (struct cmbe*)addr;
-}
+static struct kmem_cache *cmbe_cache;
 
 static int alloc_cmbe(struct ccw_device *cdev)
 {
-	struct cmbe *cmbe;
 	struct cmb_data *cmb_data;
-	int ret;
+	struct cmbe *cmbe;
+	int ret = -ENOMEM;
 
-	cmbe = kzalloc (sizeof (*cmbe) * 2, GFP_KERNEL);
+	cmbe = kmem_cache_zalloc(cmbe_cache, GFP_KERNEL);
 	if (!cmbe)
-		return -ENOMEM;
-	cmb_data = kzalloc(sizeof(struct cmb_data), GFP_KERNEL);
-	if (!cmb_data) {
-		ret = -ENOMEM;
+		return ret;
+
+	cmb_data = kzalloc(sizeof(*cmb_data), GFP_KERNEL);
+	if (!cmb_data)
 		goto out_free;
-	}
+
 	cmb_data->last_block = kzalloc(sizeof(struct cmbe), GFP_KERNEL);
-	if (!cmb_data->last_block) {
-		ret = -ENOMEM;
+	if (!cmb_data->last_block)
 		goto out_free;
-	}
-	cmb_data->size = sizeof(struct cmbe);
-	spin_lock_irq(cdev->ccwlock);
-	if (cdev->private->cmb) {
-		spin_unlock_irq(cdev->ccwlock);
-		ret = -EBUSY;
-		goto out_free;
-	}
+
+	cmb_data->size = sizeof(*cmbe);
 	cmb_data->hw_block = cmbe;
+
+	spin_lock(&cmb_area.lock);
+	spin_lock_irq(cdev->ccwlock);
+	if (cdev->private->cmb)
+		goto out_unlock;
+
 	cdev->private->cmb = cmb_data;
-	spin_unlock_irq(cdev->ccwlock);
 
 	/* activate global measurement if this is the first channel */
-	spin_lock(&cmb_area.lock);
 	if (list_empty(&cmb_area.list))
 		cmf_activate(NULL, 1);
 	list_add_tail(&cdev->private->cmb_list, &cmb_area.list);
-	spin_unlock(&cmb_area.lock);
 
+	spin_unlock_irq(cdev->ccwlock);
+	spin_unlock(&cmb_area.lock);
 	return 0;
+
+out_unlock:
+	spin_unlock_irq(cdev->ccwlock);
+	spin_unlock(&cmb_area.lock);
+	ret = -EBUSY;
 out_free:
 	if (cmb_data)
 		kfree(cmb_data->last_block);
 	kfree(cmb_data);
-	kfree(cmbe);
+	kmem_cache_free(cmbe_cache, cmbe);
+
 	return ret;
 }
 
@@ -869,19 +854,21 @@
 {
 	struct cmb_data *cmb_data;
 
+	spin_lock(&cmb_area.lock);
 	spin_lock_irq(cdev->ccwlock);
 	cmb_data = cdev->private->cmb;
 	cdev->private->cmb = NULL;
-	if (cmb_data)
+	if (cmb_data) {
 		kfree(cmb_data->last_block);
+		kmem_cache_free(cmbe_cache, cmb_data->hw_block);
+	}
 	kfree(cmb_data);
-	spin_unlock_irq(cdev->ccwlock);
 
 	/* deactivate global measurement if this is the last channel */
-	spin_lock(&cmb_area.lock);
 	list_del_init(&cdev->private->cmb_list);
 	if (list_empty(&cmb_area.list))
 		cmf_activate(NULL, 0);
+	spin_unlock_irq(cdev->ccwlock);
 	spin_unlock(&cmb_area.lock);
 }
 
@@ -897,7 +884,7 @@
 		return -EINVAL;
 	}
 	cmb_data = cdev->private->cmb;
-	mba = mme ? (unsigned long) cmbe_align(cmb_data->hw_block) : 0;
+	mba = mme ? (unsigned long) cmb_data->hw_block : 0;
 	spin_unlock_irqrestore(cdev->ccwlock, flags);
 
 	return set_schib_wait(cdev, mme, 1, mba);
@@ -1022,11 +1009,6 @@
 	cmf_generic_reset(cdev);
 }
 
-static void * align_cmbe(void *area)
-{
-	return cmbe_align(area);
-}
-
 static struct attribute_group cmf_attr_group_ext;
 
 static struct cmb_operations cmbops_extended = {
@@ -1036,7 +1018,6 @@
 	.read	    = read_cmbe,
 	.readall    = readall_cmbe,
 	.reset	    = reset_cmbe,
-	.align	    = align_cmbe,
 	.attr_group = &cmf_attr_group_ext,
 };
 
@@ -1171,23 +1152,28 @@
 			       struct device_attribute *attr,
 			       char *buf)
 {
-	return sprintf(buf, "%d\n", to_ccwdev(dev)->private->cmb ? 1 : 0);
+	struct ccw_device *cdev = to_ccwdev(dev);
+	int enabled;
+
+	spin_lock_irq(cdev->ccwlock);
+	enabled = !!cdev->private->cmb;
+	spin_unlock_irq(cdev->ccwlock);
+
+	return sprintf(buf, "%d\n", enabled);
 }
 
 static ssize_t cmb_enable_store(struct device *dev,
 				struct device_attribute *attr, const char *buf,
 				size_t c)
 {
-	struct ccw_device *cdev;
-	int ret;
+	struct ccw_device *cdev = to_ccwdev(dev);
 	unsigned long val;
+	int ret;
 
 	ret = kstrtoul(buf, 16, &val);
 	if (ret)
 		return ret;
 
-	cdev = to_ccwdev(dev);
-
 	switch (val) {
 	case 0:
 		ret = disable_cmf(cdev);
@@ -1195,12 +1181,13 @@
 	case 1:
 		ret = enable_cmf(cdev);
 		break;
+	default:
+		ret = -EINVAL;
 	}
 
-	return c;
+	return ret ? ret : c;
 }
-
-DEVICE_ATTR(cmb_enable, 0644, cmb_enable_show, cmb_enable_store);
+DEVICE_ATTR_RW(cmb_enable);
 
 int ccw_set_cmf(struct ccw_device *cdev, int enable)
 {
@@ -1220,20 +1207,51 @@
 {
 	int ret;
 
+	device_lock(&cdev->dev);
+	get_device(&cdev->dev);
 	ret = cmbops->alloc(cdev);
-	cmbops->reset(cdev);
 	if (ret)
-		return ret;
-	ret = cmbops->set(cdev, 2);
+		goto out;
+	cmbops->reset(cdev);
+	ret = sysfs_create_group(&cdev->dev.kobj, cmbops->attr_group);
 	if (ret) {
 		cmbops->free(cdev);
-		return ret;
+		goto out;
 	}
-	ret = sysfs_create_group(&cdev->dev.kobj, cmbops->attr_group);
-	if (!ret)
-		return 0;
-	cmbops->set(cdev, 0);  //FIXME: this can fail
+	ret = cmbops->set(cdev, 2);
+	if (ret) {
+		sysfs_remove_group(&cdev->dev.kobj, cmbops->attr_group);
+		cmbops->free(cdev);
+	}
+out:
+	if (ret)
+		put_device(&cdev->dev);
+
+	device_unlock(&cdev->dev);
+	return ret;
+}
+
+/**
+ * __disable_cmf() - switch off the channel measurement for a specific device
+ *  @cdev:	The ccw device to be disabled
+ *
+ *  Returns %0 for success or a negative error value.
+ *
+ *  Context:
+ *    non-atomic, device_lock() held.
+ */
+int __disable_cmf(struct ccw_device *cdev)
+{
+	int ret;
+
+	ret = cmbops->set(cdev, 0);
+	if (ret)
+		return ret;
+
+	sysfs_remove_group(&cdev->dev.kobj, cmbops->attr_group);
 	cmbops->free(cdev);
+	put_device(&cdev->dev);
+
 	return ret;
 }
 
@@ -1250,11 +1268,10 @@
 {
 	int ret;
 
-	ret = cmbops->set(cdev, 0);
-	if (ret)
-		return ret;
-	cmbops->free(cdev);
-	sysfs_remove_group(&cdev->dev.kobj, cmbops->attr_group);
+	device_lock(&cdev->dev);
+	ret = __disable_cmf(cdev);
+	device_unlock(&cdev->dev);
+
 	return ret;
 }
 
@@ -1295,10 +1312,32 @@
 	return cmbops->set(cdev, 2);
 }
 
+/**
+ * cmf_reactivate() - reactivate measurement block updates
+ *
+ * Use this during resume from hibernate.
+ */
+void cmf_reactivate(void)
+{
+	spin_lock(&cmb_area.lock);
+	if (!list_empty(&cmb_area.list))
+		cmf_activate(cmb_area.mem, 1);
+	spin_unlock(&cmb_area.lock);
+}
+
+static int __init init_cmbe(void)
+{
+	cmbe_cache = kmem_cache_create("cmbe_cache", sizeof(struct cmbe),
+				       __alignof__(struct cmbe), 0, NULL);
+
+	return cmbe_cache ? 0 : -ENOMEM;
+}
+
 static int __init init_cmf(void)
 {
 	char *format_string;
-	char *detect_string = "parameter";
+	char *detect_string;
+	int ret;
 
 	/*
 	 * If the user did not give a parameter, see if we are running on a
@@ -1324,15 +1363,18 @@
 	case CMF_EXTENDED:
 		format_string = "extended";
 		cmbops = &cmbops_extended;
+
+		ret = init_cmbe();
+		if (ret)
+			return ret;
 		break;
 	default:
-		return 1;
+		return -EINVAL;
 	}
 	pr_info("Channel measurement facility initialized using format "
 		"%s (mode %s)\n", format_string, detect_string);
 	return 0;
 }
-
 module_init(init_cmf);
 
 
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 0268e5f..2ee3053 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -44,7 +44,6 @@
 	int ret;
 
 	init_subchannel_id(&schid);
-	ret = -ENODEV;
 	do {
 		do {
 			ret = fn(schid, data);
@@ -1089,6 +1088,7 @@
 		if (chp)
 			chp_update_desc(chp);
 	}
+	cmf_reactivate();
 }
 
 #ifdef CONFIG_PROC_FS
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index dfef5e6..6aae684 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1787,6 +1787,8 @@
 	cdev->drv = NULL;
 	cdev->private->int_class = IRQIO_CIO;
 	spin_unlock_irq(cdev->ccwlock);
+	__disable_cmf(cdev);
+
 	return 0;
 }
 
@@ -1797,7 +1799,7 @@
 	cdev = to_ccwdev(dev);
 	if (cdev->drv && cdev->drv->shutdown)
 		cdev->drv->shutdown(cdev);
-	disable_cmf(cdev);
+	__disable_cmf(cdev);
 }
 
 static int ccw_device_pm_prepare(struct device *dev)
diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h
index 8d1d298..065b1be 100644
--- a/drivers/s390/cio/device.h
+++ b/drivers/s390/cio/device.h
@@ -125,11 +125,6 @@
 void ccw_device_disband_start(struct ccw_device *);
 void ccw_device_disband_done(struct ccw_device *, int);
 
-void ccw_device_stlck_start(struct ccw_device *, void *, void *, void *);
-void ccw_device_stlck_done(struct ccw_device *, void *, int);
-
-int ccw_device_call_handler(struct ccw_device *);
-
 int ccw_device_stlck(struct ccw_device *);
 
 /* Helper function for machine check handling. */
@@ -145,6 +140,7 @@
 void retry_set_schib(struct ccw_device *cdev);
 void cmf_retry_copy_block(struct ccw_device *);
 int cmf_reenable(struct ccw_device *);
+void cmf_reactivate(void);
 int ccw_set_cmf(struct ccw_device *cdev, int enable);
 extern struct device_attribute dev_attr_cmb_enable;
 #endif
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 83da53c8..92e03b4 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -731,6 +731,44 @@
 }
 
 /*
+ * Pass interrupt to device driver.
+ */
+static int ccw_device_call_handler(struct ccw_device *cdev)
+{
+	unsigned int stctl;
+	int ending_status;
+
+	/*
+	 * we allow for the device action handler if .
+	 *  - we received ending status
+	 *  - the action handler requested to see all interrupts
+	 *  - we received an intermediate status
+	 *  - fast notification was requested (primary status)
+	 *  - unsolicited interrupts
+	 */
+	stctl = scsw_stctl(&cdev->private->irb.scsw);
+	ending_status = (stctl & SCSW_STCTL_SEC_STATUS) ||
+		(stctl == (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)) ||
+		(stctl == SCSW_STCTL_STATUS_PEND);
+	if (!ending_status &&
+	    !cdev->private->options.repall &&
+	    !(stctl & SCSW_STCTL_INTER_STATUS) &&
+	    !(cdev->private->options.fast &&
+	      (stctl & SCSW_STCTL_PRIM_STATUS)))
+		return 0;
+
+	if (ending_status)
+		ccw_device_set_timeout(cdev, 0);
+
+	if (cdev->handler)
+		cdev->handler(cdev, cdev->private->intparm,
+			      &cdev->private->irb);
+
+	memset(&cdev->private->irb, 0, sizeof(struct irb));
+	return 1;
+}
+
+/*
  * Got an interrupt for a normal io (state online).
  */
 static void
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 6acd0b5..a69f702 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -412,52 +412,6 @@
 	return cio_resume(sch);
 }
 
-/*
- * Pass interrupt to device driver.
- */
-int
-ccw_device_call_handler(struct ccw_device *cdev)
-{
-	unsigned int stctl;
-	int ending_status;
-
-	/*
-	 * we allow for the device action handler if .
-	 *  - we received ending status
-	 *  - the action handler requested to see all interrupts
-	 *  - we received an intermediate status
-	 *  - fast notification was requested (primary status)
-	 *  - unsolicited interrupts
-	 */
-	stctl = scsw_stctl(&cdev->private->irb.scsw);
-	ending_status = (stctl & SCSW_STCTL_SEC_STATUS) ||
-		(stctl == (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)) ||
-		(stctl == SCSW_STCTL_STATUS_PEND);
-	if (!ending_status &&
-	    !cdev->private->options.repall &&
-	    !(stctl & SCSW_STCTL_INTER_STATUS) &&
-	    !(cdev->private->options.fast &&
-	      (stctl & SCSW_STCTL_PRIM_STATUS)))
-		return 0;
-
-	/* Clear pending timers for device driver initiated I/O. */
-	if (ending_status)
-		ccw_device_set_timeout(cdev, 0);
-	/*
-	 * Now we are ready to call the device driver interrupt handler.
-	 */
-	if (cdev->handler)
-		cdev->handler(cdev, cdev->private->intparm,
-			      &cdev->private->irb);
-
-	/*
-	 * Clear the old and now useless interrupt response block.
-	 */
-	memset(&cdev->private->irb, 0, sizeof(struct irb));
-
-	return 1;
-}
-
 /**
  * ccw_device_get_ciw() - Search for CIW command in extended sense data.
  * @cdev: ccw device to inspect
@@ -502,67 +456,6 @@
 	return sch->lpm;
 }
 
-struct stlck_data {
-	struct completion done;
-	int rc;
-};
-
-void ccw_device_stlck_done(struct ccw_device *cdev, void *data, int rc)
-{
-	struct stlck_data *sdata = data;
-
-	sdata->rc = rc;
-	complete(&sdata->done);
-}
-
-/*
- * Perform unconditional reserve + release.
- */
-int ccw_device_stlck(struct ccw_device *cdev)
-{
-	struct subchannel *sch = to_subchannel(cdev->dev.parent);
-	struct stlck_data data;
-	u8 *buffer;
-	int rc;
-
-	/* Check if steal lock operation is valid for this device. */
-	if (cdev->drv) {
-		if (!cdev->private->options.force)
-			return -EINVAL;
-	}
-	buffer = kzalloc(64, GFP_DMA | GFP_KERNEL);
-	if (!buffer)
-		return -ENOMEM;
-	init_completion(&data.done);
-	data.rc = -EIO;
-	spin_lock_irq(sch->lock);
-	rc = cio_enable_subchannel(sch, (u32) (addr_t) sch);
-	if (rc)
-		goto out_unlock;
-	/* Perform operation. */
-	cdev->private->state = DEV_STATE_STEAL_LOCK;
-	ccw_device_stlck_start(cdev, &data, &buffer[0], &buffer[32]);
-	spin_unlock_irq(sch->lock);
-	/* Wait for operation to finish. */
-	if (wait_for_completion_interruptible(&data.done)) {
-		/* Got a signal. */
-		spin_lock_irq(sch->lock);
-		ccw_request_cancel(cdev);
-		spin_unlock_irq(sch->lock);
-		wait_for_completion(&data.done);
-	}
-	rc = data.rc;
-	/* Check results. */
-	spin_lock_irq(sch->lock);
-	cio_disable_subchannel(sch);
-	cdev->private->state = DEV_STATE_BOXED;
-out_unlock:
-	spin_unlock_irq(sch->lock);
-	kfree(buffer);
-
-	return rc;
-}
-
 /**
  * chp_get_chp_desc - return newly allocated channel-path descriptor
  * @cdev: device to obtain the descriptor for
diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c
index 37ada05e..da246b6 100644
--- a/drivers/s390/cio/device_pgid.c
+++ b/drivers/s390/cio/device_pgid.c
@@ -9,9 +9,10 @@
 
 #include <linux/kernel.h>
 #include <linux/string.h>
+#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/bitops.h>
+#include <linux/slab.h>
 #include <asm/ccwdev.h>
 #include <asm/cio.h>
 
@@ -133,7 +134,7 @@
 {
 	struct ccw_request *req = &cdev->private->req;
 	struct ccw1 *cp = cdev->private->iccws;
-	int i = 8 - ffs(req->lpm);
+	int i = pathmask_to_pos(req->lpm);
 	struct pgid *pgid = &cdev->private->pgid[i];
 
 	pgid->inf.fc	= fn;
@@ -434,7 +435,7 @@
 {
 	struct ccw_request *req = &cdev->private->req;
 	struct ccw1 *cp = cdev->private->iccws;
-	int i = 8 - ffs(req->lpm);
+	int i = pathmask_to_pos(req->lpm);
 
 	/* Channel program setup. */
 	cp->cmd_code	= CCW_CMD_SENSE_PGID;
@@ -616,6 +617,11 @@
 	ccw_request_start(cdev);
 }
 
+struct stlck_data {
+	struct completion done;
+	int rc;
+};
+
 static void stlck_build_cp(struct ccw_device *cdev, void *buf1, void *buf2)
 {
 	struct ccw_request *req = &cdev->private->req;
@@ -634,7 +640,10 @@
 
 static void stlck_callback(struct ccw_device *cdev, void *data, int rc)
 {
-	ccw_device_stlck_done(cdev, data, rc);
+	struct stlck_data *sdata = data;
+
+	sdata->rc = rc;
+	complete(&sdata->done);
 }
 
 /**
@@ -645,11 +654,9 @@
  * @buf2: data pointer used in channel program
  *
  * Execute a channel program on @cdev to release an existing PGID reservation.
- * When finished, call ccw_device_stlck_done with a return code specifying the
- * result.
  */
-void ccw_device_stlck_start(struct ccw_device *cdev, void *data, void *buf1,
-			    void *buf2)
+static void ccw_device_stlck_start(struct ccw_device *cdev, void *data,
+				   void *buf1, void *buf2)
 {
 	struct subchannel *sch = to_subchannel(cdev->dev.parent);
 	struct ccw_request *req = &cdev->private->req;
@@ -667,3 +674,50 @@
 	ccw_request_start(cdev);
 }
 
+/*
+ * Perform unconditional reserve + release.
+ */
+int ccw_device_stlck(struct ccw_device *cdev)
+{
+	struct subchannel *sch = to_subchannel(cdev->dev.parent);
+	struct stlck_data data;
+	u8 *buffer;
+	int rc;
+
+	/* Check if steal lock operation is valid for this device. */
+	if (cdev->drv) {
+		if (!cdev->private->options.force)
+			return -EINVAL;
+	}
+	buffer = kzalloc(64, GFP_DMA | GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+	init_completion(&data.done);
+	data.rc = -EIO;
+	spin_lock_irq(sch->lock);
+	rc = cio_enable_subchannel(sch, (u32) (addr_t) sch);
+	if (rc)
+		goto out_unlock;
+	/* Perform operation. */
+	cdev->private->state = DEV_STATE_STEAL_LOCK;
+	ccw_device_stlck_start(cdev, &data, &buffer[0], &buffer[32]);
+	spin_unlock_irq(sch->lock);
+	/* Wait for operation to finish. */
+	if (wait_for_completion_interruptible(&data.done)) {
+		/* Got a signal. */
+		spin_lock_irq(sch->lock);
+		ccw_request_cancel(cdev);
+		spin_unlock_irq(sch->lock);
+		wait_for_completion(&data.done);
+	}
+	rc = data.rc;
+	/* Check results. */
+	spin_lock_irq(sch->lock);
+	cio_disable_subchannel(sch);
+	cdev->private->state = DEV_STATE_BOXED;
+out_unlock:
+	spin_unlock_irq(sch->lock);
+	kfree(buffer);
+
+	return rc;
+}
diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile
index 771faf7..57f710b 100644
--- a/drivers/s390/crypto/Makefile
+++ b/drivers/s390/crypto/Makefile
@@ -3,6 +3,6 @@
 #
 
 ap-objs := ap_bus.o
-obj-$(CONFIG_ZCRYPT) += ap.o zcrypt_api.o zcrypt_pcicc.o zcrypt_pcixcc.o
-obj-$(CONFIG_ZCRYPT) += zcrypt_pcica.o zcrypt_cex2a.o zcrypt_cex4.o
+obj-$(CONFIG_ZCRYPT) += ap.o zcrypt_api.o zcrypt_pcixcc.o
+obj-$(CONFIG_ZCRYPT) += zcrypt_cex2a.o zcrypt_cex4.o
 obj-$(CONFIG_ZCRYPT) += zcrypt_msgtype6.o zcrypt_msgtype50.o
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index d78b3d6..9cb3dfb 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -37,6 +37,7 @@
 #include <linux/notifier.h>
 #include <linux/kthread.h>
 #include <linux/mutex.h>
+#include <linux/suspend.h>
 #include <asm/reset.h>
 #include <asm/airq.h>
 #include <linux/atomic.h>
@@ -48,23 +49,6 @@
 
 #include "ap_bus.h"
 
-/* Some prototypes. */
-static void ap_scan_bus(struct work_struct *);
-static void ap_poll_all(unsigned long);
-static enum hrtimer_restart ap_poll_timeout(struct hrtimer *);
-static int ap_poll_thread_start(void);
-static void ap_poll_thread_stop(void);
-static void ap_request_timeout(unsigned long);
-static inline void ap_schedule_poll_timer(void);
-static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags);
-static int ap_device_remove(struct device *dev);
-static int ap_device_probe(struct device *dev);
-static void ap_interrupt_handler(struct airq_struct *airq);
-static void ap_reset(struct ap_device *ap_dev, unsigned long *flags);
-static void ap_config_timeout(unsigned long ptr);
-static int ap_select_domain(void);
-static void ap_query_configuration(void);
-
 /*
  * Module description.
  */
@@ -92,17 +76,18 @@
 static LIST_HEAD(ap_device_list);
 
 /*
- * Workqueue & timer for bus rescan.
+ * Workqueue timer for bus rescan.
  */
-static struct workqueue_struct *ap_work_queue;
 static struct timer_list ap_config_timer;
 static int ap_config_time = AP_CONFIG_TIME;
-static DECLARE_WORK(ap_config_work, ap_scan_bus);
+static void ap_scan_bus(struct work_struct *);
+static DECLARE_WORK(ap_scan_work, ap_scan_bus);
 
 /*
  * Tasklet & timer for AP request polling and interrupts
  */
-static DECLARE_TASKLET(ap_tasklet, ap_poll_all, 0);
+static void ap_tasklet_fn(unsigned long);
+static DECLARE_TASKLET(ap_tasklet, ap_tasklet_fn, 0);
 static atomic_t ap_poll_requests = ATOMIC_INIT(0);
 static DECLARE_WAIT_QUEUE_HEAD(ap_poll_wait);
 static struct task_struct *ap_poll_kthread = NULL;
@@ -115,6 +100,8 @@
 
 /* Suspend flag */
 static int ap_suspend_flag;
+/* Maximum domain id */
+static int ap_max_domain_id;
 /* Flag to check if domain was set through module parameter domain=. This is
  * important when supsend and resume is done in a z/VM environment where the
  * domain might change. */
@@ -122,6 +109,8 @@
 static struct bus_type ap_bus_type;
 
 /* Adapter interrupt definitions */
+static void ap_interrupt_handler(struct airq_struct *airq);
+
 static int ap_airq_flag;
 
 static struct airq_struct ap_airq = {
@@ -182,44 +171,27 @@
 /**
  * ap_test_queue(): Test adjunct processor queue.
  * @qid: The AP queue number
- * @queue_depth: Pointer to queue depth value
- * @device_type: Pointer to device type value
+ * @info: Pointer to queue descriptor
  *
  * Returns AP queue status structure.
  */
 static inline struct ap_queue_status
-ap_test_queue(ap_qid_t qid, int *queue_depth, int *device_type)
+ap_test_queue(ap_qid_t qid, unsigned long *info)
 {
 	register unsigned long reg0 asm ("0") = qid;
 	register struct ap_queue_status reg1 asm ("1");
 	register unsigned long reg2 asm ("2") = 0UL;
 
+	if (test_facility(15))
+		reg0 |= 1UL << 23;		/* set APFT T bit*/
 	asm volatile(".long 0xb2af0000"		/* PQAP(TAPQ) */
 		     : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc");
-	*device_type = (int) (reg2 >> 24);
-	*queue_depth = (int) (reg2 & 0xff);
+	if (info)
+		*info = reg2;
 	return reg1;
 }
 
 /**
- * ap_query_facilities(): PQAP(TAPQ) query facilities.
- * @qid: The AP queue number
- *
- * Returns content of general register 2 after the PQAP(TAPQ)
- * instruction was called.
- */
-static inline unsigned long ap_query_facilities(ap_qid_t qid)
-{
-	register unsigned long reg0 asm ("0") = qid | 0x00800000UL;
-	register unsigned long reg1 asm ("1");
-	register unsigned long reg2 asm ("2") = 0UL;
-
-	asm volatile(".long 0xb2af0000"  /* PQAP(TAPQ) */
-		     : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc");
-	return reg2;
-}
-
-/**
  * ap_reset_queue(): Reset adjunct processor queue.
  * @qid: The AP queue number
  *
@@ -259,31 +231,19 @@
 	return reg1_out;
 }
 
-static inline struct ap_queue_status
-__ap_query_functions(ap_qid_t qid, unsigned int *functions)
-{
-	register unsigned long reg0 asm ("0") = 0UL | qid | (1UL << 23);
-	register struct ap_queue_status reg1 asm ("1") = AP_QUEUE_STATUS_INVALID;
-	register unsigned long reg2 asm ("2");
-
-	asm volatile(
-		".long 0xb2af0000\n"		/* PQAP(TAPQ) */
-		"0:\n"
-		EX_TABLE(0b, 0b)
-		: "+d" (reg0), "+d" (reg1), "=d" (reg2)
-		:
-		: "cc");
-
-	*functions = (unsigned int)(reg2 >> 32);
-	return reg1;
-}
-
-static inline int __ap_query_configuration(struct ap_config_info *config)
+/**
+ * ap_query_configuration(): Get AP configuration data
+ *
+ * Returns 0 on success, or -EOPNOTSUPP.
+ */
+static inline int ap_query_configuration(void)
 {
 	register unsigned long reg0 asm ("0") = 0x04000000UL;
 	register unsigned long reg1 asm ("1") = -EINVAL;
-	register unsigned char *reg2 asm ("2") = (unsigned char *)config;
+	register void *reg2 asm ("2") = (void *) ap_configuration;
 
+	if (!ap_configuration)
+		return -EOPNOTSUPP;
 	asm volatile(
 		".long 0xb2af0000\n"		/* PQAP(QCI) */
 		"0: la    %1,0\n"
@@ -297,41 +257,62 @@
 }
 
 /**
- * ap_query_functions(): Query supported functions.
- * @qid: The AP queue number
- * @functions: Pointer to functions field.
- *
- * Returns
- *   0	     on success.
- *   -ENODEV  if queue not valid.
- *   -EBUSY   if device busy.
- *   -EINVAL  if query function is not supported
+ * ap_init_configuration(): Allocate and query configuration array.
  */
-static int ap_query_functions(ap_qid_t qid, unsigned int *functions)
+static void ap_init_configuration(void)
 {
-	struct ap_queue_status status;
+	if (!ap_configuration_available())
+		return;
 
-	status = __ap_query_functions(qid, functions);
-
-	if (ap_queue_status_invalid_test(&status))
-		return -ENODEV;
-
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-		return 0;
-	case AP_RESPONSE_Q_NOT_AVAIL:
-	case AP_RESPONSE_DECONFIGURED:
-	case AP_RESPONSE_CHECKSTOPPED:
-	case AP_RESPONSE_INVALID_ADDRESS:
-		return -ENODEV;
-	case AP_RESPONSE_RESET_IN_PROGRESS:
-	case AP_RESPONSE_BUSY:
-	case AP_RESPONSE_OTHERWISE_CHANGED:
-	default:
-		return -EBUSY;
+	ap_configuration = kzalloc(sizeof(*ap_configuration), GFP_KERNEL);
+	if (!ap_configuration)
+		return;
+	if (ap_query_configuration() != 0) {
+		kfree(ap_configuration);
+		ap_configuration = NULL;
+		return;
 	}
 }
 
+/*
+ * ap_test_config(): helper function to extract the nrth bit
+ *		     within the unsigned int array field.
+ */
+static inline int ap_test_config(unsigned int *field, unsigned int nr)
+{
+	return ap_test_bit((field + (nr >> 5)), (nr & 0x1f));
+}
+
+/*
+ * ap_test_config_card_id(): Test, whether an AP card ID is configured.
+ * @id AP card ID
+ *
+ * Returns 0 if the card is not configured
+ *	   1 if the card is configured or
+ *	     if the configuration information is not available
+ */
+static inline int ap_test_config_card_id(unsigned int id)
+{
+	if (!ap_configuration)	/* QCI not supported */
+		return 1;
+	return ap_test_config(ap_configuration->apm, id);
+}
+
+/*
+ * ap_test_config_domain(): Test, whether an AP usage domain is configured.
+ * @domain AP usage domain ID
+ *
+ * Returns 0 if the usage domain is not configured
+ *	   1 if the usage domain is configured or
+ *	     if the configuration information is not available
+ */
+static inline int ap_test_config_domain(unsigned int domain)
+{
+	if (!ap_configuration)	/* QCI not supported */
+		return domain < 16;
+	return ap_test_config(ap_configuration->aqm, domain);
+}
+
 /**
  * ap_queue_enable_interruption(): Enable interruption on an AP.
  * @qid: The AP queue number
@@ -354,7 +335,9 @@
 	case AP_RESPONSE_DECONFIGURED:
 	case AP_RESPONSE_CHECKSTOPPED:
 	case AP_RESPONSE_INVALID_ADDRESS:
-		return -ENODEV;
+		pr_err("Registering adapter interrupts for AP %d failed\n",
+		       AP_QID_DEVICE(ap_dev->qid));
+		return -EOPNOTSUPP;
 	case AP_RESPONSE_RESET_IN_PROGRESS:
 	case AP_RESPONSE_BUSY:
 	default:
@@ -480,52 +463,32 @@
 EXPORT_SYMBOL(ap_recv);
 
 /**
- * __ap_schedule_poll_timer(): Schedule poll timer.
- *
- * Set up the timer to run the poll tasklet
- */
-static inline void __ap_schedule_poll_timer(void)
-{
-	ktime_t hr_time;
-
-	spin_lock_bh(&ap_poll_timer_lock);
-	if (!hrtimer_is_queued(&ap_poll_timer) && !ap_suspend_flag) {
-		hr_time = ktime_set(0, poll_timeout);
-		hrtimer_forward_now(&ap_poll_timer, hr_time);
-		hrtimer_restart(&ap_poll_timer);
-	}
-	spin_unlock_bh(&ap_poll_timer_lock);
-}
-
-/**
- * ap_schedule_poll_timer(): Schedule poll timer.
- *
- * Set up the timer to run the poll tasklet
- */
-static inline void ap_schedule_poll_timer(void)
-{
-	if (ap_using_interrupts())
-		return;
-	__ap_schedule_poll_timer();
-}
-
-
-/**
  * ap_query_queue(): Check if an AP queue is available.
  * @qid: The AP queue number
  * @queue_depth: Pointer to queue depth value
  * @device_type: Pointer to device type value
+ * @facilities: Pointer to facility indicator
  */
-static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type)
+static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type,
+			  unsigned int *facilities)
 {
 	struct ap_queue_status status;
-	int t_depth, t_device_type;
+	unsigned long info;
+	int nd;
 
-	status = ap_test_queue(qid, &t_depth, &t_device_type);
+	if (!ap_test_config_card_id(AP_QID_DEVICE(qid)))
+		return -ENODEV;
+
+	status = ap_test_queue(qid, &info);
 	switch (status.response_code) {
 	case AP_RESPONSE_NORMAL:
-		*queue_depth = t_depth + 1;
-		*device_type = t_device_type;
+		*queue_depth = (int)(info & 0xff);
+		*device_type = (int)((info >> 24) & 0xff);
+		*facilities = (unsigned int)(info >> 32);
+		/* Update maximum domain id */
+		nd = (info >> 16) & 0xff;
+		if ((info & (1UL << 57)) && nd > 0)
+			ap_max_domain_id = nd;
 		return 0;
 	case AP_RESPONSE_Q_NOT_AVAIL:
 	case AP_RESPONSE_DECONFIGURED:
@@ -541,75 +504,518 @@
 	}
 }
 
+/* State machine definitions and helpers */
+
+static void ap_sm_wait(enum ap_wait wait)
+{
+	ktime_t hr_time;
+
+	switch (wait) {
+	case AP_WAIT_AGAIN:
+	case AP_WAIT_INTERRUPT:
+		if (ap_using_interrupts())
+			break;
+		if (ap_poll_kthread) {
+			wake_up(&ap_poll_wait);
+			break;
+		}
+		/* Fall through */
+	case AP_WAIT_TIMEOUT:
+		spin_lock_bh(&ap_poll_timer_lock);
+		if (!hrtimer_is_queued(&ap_poll_timer)) {
+			hr_time = ktime_set(0, poll_timeout);
+			hrtimer_forward_now(&ap_poll_timer, hr_time);
+			hrtimer_restart(&ap_poll_timer);
+		}
+		spin_unlock_bh(&ap_poll_timer_lock);
+		break;
+	case AP_WAIT_NONE:
+	default:
+		break;
+	}
+}
+
+static enum ap_wait ap_sm_nop(struct ap_device *ap_dev)
+{
+	return AP_WAIT_NONE;
+}
+
 /**
- * ap_init_queue(): Reset an AP queue.
+ * ap_sm_recv(): Receive pending reply messages from an AP device but do
+ *	not change the state of the device.
+ * @ap_dev: pointer to the AP device
+ *
+ * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
+ */
+static struct ap_queue_status ap_sm_recv(struct ap_device *ap_dev)
+{
+	struct ap_queue_status status;
+	struct ap_message *ap_msg;
+
+	status = __ap_recv(ap_dev->qid, &ap_dev->reply->psmid,
+			   ap_dev->reply->message, ap_dev->reply->length);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		atomic_dec(&ap_poll_requests);
+		ap_dev->queue_count--;
+		if (ap_dev->queue_count > 0)
+			mod_timer(&ap_dev->timeout,
+				  jiffies + ap_dev->drv->request_timeout);
+		list_for_each_entry(ap_msg, &ap_dev->pendingq, list) {
+			if (ap_msg->psmid != ap_dev->reply->psmid)
+				continue;
+			list_del_init(&ap_msg->list);
+			ap_dev->pendingq_count--;
+			ap_msg->receive(ap_dev, ap_msg, ap_dev->reply);
+			break;
+		}
+	case AP_RESPONSE_NO_PENDING_REPLY:
+		if (!status.queue_empty || ap_dev->queue_count <= 0)
+			break;
+		/* The card shouldn't forget requests but who knows. */
+		atomic_sub(ap_dev->queue_count, &ap_poll_requests);
+		ap_dev->queue_count = 0;
+		list_splice_init(&ap_dev->pendingq, &ap_dev->requestq);
+		ap_dev->requestq_count += ap_dev->pendingq_count;
+		ap_dev->pendingq_count = 0;
+		break;
+	default:
+		break;
+	}
+	return status;
+}
+
+/**
+ * ap_sm_read(): Receive pending reply messages from an AP device.
+ * @ap_dev: pointer to the AP device
+ *
+ * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
+ */
+static enum ap_wait ap_sm_read(struct ap_device *ap_dev)
+{
+	struct ap_queue_status status;
+
+	status = ap_sm_recv(ap_dev);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		if (ap_dev->queue_count > 0)
+			return AP_WAIT_AGAIN;
+		ap_dev->state = AP_STATE_IDLE;
+		return AP_WAIT_NONE;
+	case AP_RESPONSE_NO_PENDING_REPLY:
+		if (ap_dev->queue_count > 0)
+			return AP_WAIT_INTERRUPT;
+		ap_dev->state = AP_STATE_IDLE;
+		return AP_WAIT_NONE;
+	default:
+		ap_dev->state = AP_STATE_BORKED;
+		return AP_WAIT_NONE;
+	}
+}
+
+/**
+ * ap_sm_write(): Send messages from the request queue to an AP device.
+ * @ap_dev: pointer to the AP device
+ *
+ * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
+ */
+static enum ap_wait ap_sm_write(struct ap_device *ap_dev)
+{
+	struct ap_queue_status status;
+	struct ap_message *ap_msg;
+
+	if (ap_dev->requestq_count <= 0)
+		return AP_WAIT_NONE;
+	/* Start the next request on the queue. */
+	ap_msg = list_entry(ap_dev->requestq.next, struct ap_message, list);
+	status = __ap_send(ap_dev->qid, ap_msg->psmid,
+			   ap_msg->message, ap_msg->length, ap_msg->special);
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		atomic_inc(&ap_poll_requests);
+		ap_dev->queue_count++;
+		if (ap_dev->queue_count == 1)
+			mod_timer(&ap_dev->timeout,
+				  jiffies + ap_dev->drv->request_timeout);
+		list_move_tail(&ap_msg->list, &ap_dev->pendingq);
+		ap_dev->requestq_count--;
+		ap_dev->pendingq_count++;
+		if (ap_dev->queue_count < ap_dev->queue_depth) {
+			ap_dev->state = AP_STATE_WORKING;
+			return AP_WAIT_AGAIN;
+		}
+		/* fall through */
+	case AP_RESPONSE_Q_FULL:
+		ap_dev->state = AP_STATE_QUEUE_FULL;
+		return AP_WAIT_INTERRUPT;
+	case AP_RESPONSE_RESET_IN_PROGRESS:
+		ap_dev->state = AP_STATE_RESET_WAIT;
+		return AP_WAIT_TIMEOUT;
+	case AP_RESPONSE_MESSAGE_TOO_BIG:
+	case AP_RESPONSE_REQ_FAC_NOT_INST:
+		list_del_init(&ap_msg->list);
+		ap_dev->requestq_count--;
+		ap_msg->rc = -EINVAL;
+		ap_msg->receive(ap_dev, ap_msg, NULL);
+		return AP_WAIT_AGAIN;
+	default:
+		ap_dev->state = AP_STATE_BORKED;
+		return AP_WAIT_NONE;
+	}
+}
+
+/**
+ * ap_sm_read_write(): Send and receive messages to/from an AP device.
+ * @ap_dev: pointer to the AP device
+ *
+ * Returns AP_WAIT_NONE, AP_WAIT_AGAIN, or AP_WAIT_INTERRUPT
+ */
+static enum ap_wait ap_sm_read_write(struct ap_device *ap_dev)
+{
+	return min(ap_sm_read(ap_dev), ap_sm_write(ap_dev));
+}
+
+/**
+ * ap_sm_reset(): Reset an AP queue.
  * @qid: The AP queue number
  *
  * Submit the Reset command to an AP queue.
- * Since the reset is asynchron set the state to 'RESET_IN_PROGRESS'
- * and check later via ap_poll_queue() if the reset is done.
  */
-static int ap_init_queue(struct ap_device *ap_dev)
+static enum ap_wait ap_sm_reset(struct ap_device *ap_dev)
 {
 	struct ap_queue_status status;
 
 	status = ap_reset_queue(ap_dev->qid);
 	switch (status.response_code) {
 	case AP_RESPONSE_NORMAL:
-		ap_dev->interrupt = AP_INTR_DISABLED;
-		ap_dev->reset = AP_RESET_IN_PROGRESS;
-		return 0;
 	case AP_RESPONSE_RESET_IN_PROGRESS:
+		ap_dev->state = AP_STATE_RESET_WAIT;
+		ap_dev->interrupt = AP_INTR_DISABLED;
+		return AP_WAIT_TIMEOUT;
 	case AP_RESPONSE_BUSY:
-		return -EBUSY;
+		return AP_WAIT_TIMEOUT;
 	case AP_RESPONSE_Q_NOT_AVAIL:
 	case AP_RESPONSE_DECONFIGURED:
 	case AP_RESPONSE_CHECKSTOPPED:
 	default:
-		return -ENODEV;
+		ap_dev->state = AP_STATE_BORKED;
+		return AP_WAIT_NONE;
 	}
 }
 
 /**
- * ap_increase_queue_count(): Arm request timeout.
- * @ap_dev: Pointer to an AP device.
+ * ap_sm_reset_wait(): Test queue for completion of the reset operation
+ * @ap_dev: pointer to the AP device
  *
- * Arm request timeout if an AP device was idle and a new request is submitted.
+ * Returns AP_POLL_IMMEDIATELY, AP_POLL_AFTER_TIMEROUT or 0.
  */
-static void ap_increase_queue_count(struct ap_device *ap_dev)
+static enum ap_wait ap_sm_reset_wait(struct ap_device *ap_dev)
 {
-	int timeout = ap_dev->drv->request_timeout;
+	struct ap_queue_status status;
+	unsigned long info;
 
-	ap_dev->queue_count++;
-	if (ap_dev->queue_count == 1) {
-		mod_timer(&ap_dev->timeout, jiffies + timeout);
-		ap_dev->reset = AP_RESET_ARMED;
-	}
-}
-
-/**
- * ap_decrease_queue_count(): Decrease queue count.
- * @ap_dev: Pointer to an AP device.
- *
- * If AP device is still alive, re-schedule request timeout if there are still
- * pending requests.
- */
-static void ap_decrease_queue_count(struct ap_device *ap_dev)
-{
-	int timeout = ap_dev->drv->request_timeout;
-
-	ap_dev->queue_count--;
 	if (ap_dev->queue_count > 0)
-		mod_timer(&ap_dev->timeout, jiffies + timeout);
+		/* Try to read a completed message and get the status */
+		status = ap_sm_recv(ap_dev);
 	else
-		/*
-		 * The timeout timer should to be disabled now - since
-		 * del_timer_sync() is very expensive, we just tell via the
-		 * reset flag to ignore the pending timeout timer.
-		 */
-		ap_dev->reset = AP_RESET_IGNORE;
+		/* Get the status with TAPQ */
+		status = ap_test_queue(ap_dev->qid, &info);
+
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		if (ap_using_interrupts() &&
+		    ap_queue_enable_interruption(ap_dev,
+						 ap_airq.lsi_ptr) == 0)
+			ap_dev->state = AP_STATE_SETIRQ_WAIT;
+		else
+			ap_dev->state = (ap_dev->queue_count > 0) ?
+				AP_STATE_WORKING : AP_STATE_IDLE;
+		return AP_WAIT_AGAIN;
+	case AP_RESPONSE_BUSY:
+	case AP_RESPONSE_RESET_IN_PROGRESS:
+		return AP_WAIT_TIMEOUT;
+	case AP_RESPONSE_Q_NOT_AVAIL:
+	case AP_RESPONSE_DECONFIGURED:
+	case AP_RESPONSE_CHECKSTOPPED:
+	default:
+		ap_dev->state = AP_STATE_BORKED;
+		return AP_WAIT_NONE;
+	}
 }
 
+/**
+ * ap_sm_setirq_wait(): Test queue for completion of the irq enablement
+ * @ap_dev: pointer to the AP device
+ *
+ * Returns AP_POLL_IMMEDIATELY, AP_POLL_AFTER_TIMEROUT or 0.
+ */
+static enum ap_wait ap_sm_setirq_wait(struct ap_device *ap_dev)
+{
+	struct ap_queue_status status;
+	unsigned long info;
+
+	if (ap_dev->queue_count > 0)
+		/* Try to read a completed message and get the status */
+		status = ap_sm_recv(ap_dev);
+	else
+		/* Get the status with TAPQ */
+		status = ap_test_queue(ap_dev->qid, &info);
+
+	if (status.int_enabled == 1) {
+		/* Irqs are now enabled */
+		ap_dev->interrupt = AP_INTR_ENABLED;
+		ap_dev->state = (ap_dev->queue_count > 0) ?
+			AP_STATE_WORKING : AP_STATE_IDLE;
+	}
+
+	switch (status.response_code) {
+	case AP_RESPONSE_NORMAL:
+		if (ap_dev->queue_count > 0)
+			return AP_WAIT_AGAIN;
+		/* fallthrough */
+	case AP_RESPONSE_NO_PENDING_REPLY:
+		return AP_WAIT_TIMEOUT;
+	default:
+		ap_dev->state = AP_STATE_BORKED;
+		return AP_WAIT_NONE;
+	}
+}
+
+/*
+ * AP state machine jump table
+ */
+ap_func_t *ap_jumptable[NR_AP_STATES][NR_AP_EVENTS] = {
+	[AP_STATE_RESET_START] = {
+		[AP_EVENT_POLL] = ap_sm_reset,
+		[AP_EVENT_TIMEOUT] = ap_sm_nop,
+	},
+	[AP_STATE_RESET_WAIT] = {
+		[AP_EVENT_POLL] = ap_sm_reset_wait,
+		[AP_EVENT_TIMEOUT] = ap_sm_nop,
+	},
+	[AP_STATE_SETIRQ_WAIT] = {
+		[AP_EVENT_POLL] = ap_sm_setirq_wait,
+		[AP_EVENT_TIMEOUT] = ap_sm_nop,
+	},
+	[AP_STATE_IDLE] = {
+		[AP_EVENT_POLL] = ap_sm_write,
+		[AP_EVENT_TIMEOUT] = ap_sm_nop,
+	},
+	[AP_STATE_WORKING] = {
+		[AP_EVENT_POLL] = ap_sm_read_write,
+		[AP_EVENT_TIMEOUT] = ap_sm_reset,
+	},
+	[AP_STATE_QUEUE_FULL] = {
+		[AP_EVENT_POLL] = ap_sm_read,
+		[AP_EVENT_TIMEOUT] = ap_sm_reset,
+	},
+	[AP_STATE_SUSPEND_WAIT] = {
+		[AP_EVENT_POLL] = ap_sm_read,
+		[AP_EVENT_TIMEOUT] = ap_sm_nop,
+	},
+	[AP_STATE_BORKED] = {
+		[AP_EVENT_POLL] = ap_sm_nop,
+		[AP_EVENT_TIMEOUT] = ap_sm_nop,
+	},
+};
+
+static inline enum ap_wait ap_sm_event(struct ap_device *ap_dev,
+				       enum ap_event event)
+{
+	return ap_jumptable[ap_dev->state][event](ap_dev);
+}
+
+static inline enum ap_wait ap_sm_event_loop(struct ap_device *ap_dev,
+					    enum ap_event event)
+{
+	enum ap_wait wait;
+
+	while ((wait = ap_sm_event(ap_dev, event)) == AP_WAIT_AGAIN)
+		;
+	return wait;
+}
+
+/**
+ * ap_request_timeout(): Handling of request timeouts
+ * @data: Holds the AP device.
+ *
+ * Handles request timeouts.
+ */
+static void ap_request_timeout(unsigned long data)
+{
+	struct ap_device *ap_dev = (struct ap_device *) data;
+
+	if (ap_suspend_flag)
+		return;
+	spin_lock_bh(&ap_dev->lock);
+	ap_sm_wait(ap_sm_event(ap_dev, AP_EVENT_TIMEOUT));
+	spin_unlock_bh(&ap_dev->lock);
+}
+
+/**
+ * ap_poll_timeout(): AP receive polling for finished AP requests.
+ * @unused: Unused pointer.
+ *
+ * Schedules the AP tasklet using a high resolution timer.
+ */
+static enum hrtimer_restart ap_poll_timeout(struct hrtimer *unused)
+{
+	if (!ap_suspend_flag)
+		tasklet_schedule(&ap_tasklet);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * ap_interrupt_handler() - Schedule ap_tasklet on interrupt
+ * @airq: pointer to adapter interrupt descriptor
+ */
+static void ap_interrupt_handler(struct airq_struct *airq)
+{
+	inc_irq_stat(IRQIO_APB);
+	if (!ap_suspend_flag)
+		tasklet_schedule(&ap_tasklet);
+}
+
+/**
+ * ap_tasklet_fn(): Tasklet to poll all AP devices.
+ * @dummy: Unused variable
+ *
+ * Poll all AP devices on the bus.
+ */
+static void ap_tasklet_fn(unsigned long dummy)
+{
+	struct ap_device *ap_dev;
+	enum ap_wait wait = AP_WAIT_NONE;
+
+	/* Reset the indicator if interrupts are used. Thus new interrupts can
+	 * be received. Doing it in the beginning of the tasklet is therefor
+	 * important that no requests on any AP get lost.
+	 */
+	if (ap_using_interrupts())
+		xchg(ap_airq.lsi_ptr, 0);
+
+	spin_lock(&ap_device_list_lock);
+	list_for_each_entry(ap_dev, &ap_device_list, list) {
+		spin_lock_bh(&ap_dev->lock);
+		wait = min(wait, ap_sm_event_loop(ap_dev, AP_EVENT_POLL));
+		spin_unlock_bh(&ap_dev->lock);
+	}
+	spin_unlock(&ap_device_list_lock);
+	ap_sm_wait(wait);
+}
+
+/**
+ * ap_poll_thread(): Thread that polls for finished requests.
+ * @data: Unused pointer
+ *
+ * AP bus poll thread. The purpose of this thread is to poll for
+ * finished requests in a loop if there is a "free" cpu - that is
+ * a cpu that doesn't have anything better to do. The polling stops
+ * as soon as there is another task or if all messages have been
+ * delivered.
+ */
+static int ap_poll_thread(void *data)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	set_user_nice(current, MAX_NICE);
+	set_freezable();
+	while (!kthread_should_stop()) {
+		add_wait_queue(&ap_poll_wait, &wait);
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (ap_suspend_flag ||
+		    atomic_read(&ap_poll_requests) <= 0) {
+			schedule();
+			try_to_freeze();
+		}
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&ap_poll_wait, &wait);
+		if (need_resched()) {
+			schedule();
+			try_to_freeze();
+			continue;
+		}
+		ap_tasklet_fn(0);
+	} while (!kthread_should_stop());
+	return 0;
+}
+
+static int ap_poll_thread_start(void)
+{
+	int rc;
+
+	if (ap_using_interrupts() || ap_poll_kthread)
+		return 0;
+	mutex_lock(&ap_poll_thread_mutex);
+	ap_poll_kthread = kthread_run(ap_poll_thread, NULL, "appoll");
+	rc = PTR_RET(ap_poll_kthread);
+	if (rc)
+		ap_poll_kthread = NULL;
+	mutex_unlock(&ap_poll_thread_mutex);
+	return rc;
+}
+
+static void ap_poll_thread_stop(void)
+{
+	if (!ap_poll_kthread)
+		return;
+	mutex_lock(&ap_poll_thread_mutex);
+	kthread_stop(ap_poll_kthread);
+	ap_poll_kthread = NULL;
+	mutex_unlock(&ap_poll_thread_mutex);
+}
+
+/**
+ * ap_queue_message(): Queue a request to an AP device.
+ * @ap_dev: The AP device to queue the message to
+ * @ap_msg: The message that is to be added
+ */
+void ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
+{
+	/* For asynchronous message handling a valid receive-callback
+	 * is required. */
+	BUG_ON(!ap_msg->receive);
+
+	spin_lock_bh(&ap_dev->lock);
+	/* Queue the message. */
+	list_add_tail(&ap_msg->list, &ap_dev->requestq);
+	ap_dev->requestq_count++;
+	ap_dev->total_request_count++;
+	/* Send/receive as many request from the queue as possible. */
+	ap_sm_wait(ap_sm_event_loop(ap_dev, AP_EVENT_POLL));
+	spin_unlock_bh(&ap_dev->lock);
+}
+EXPORT_SYMBOL(ap_queue_message);
+
+/**
+ * ap_cancel_message(): Cancel a crypto request.
+ * @ap_dev: The AP device that has the message queued
+ * @ap_msg: The message that is to be removed
+ *
+ * Cancel a crypto request. This is done by removing the request
+ * from the device pending or request queue. Note that the
+ * request stays on the AP queue. When it finishes the message
+ * reply will be discarded because the psmid can't be found.
+ */
+void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
+{
+	struct ap_message *tmp;
+
+	spin_lock_bh(&ap_dev->lock);
+	if (!list_empty(&ap_msg->list)) {
+		list_for_each_entry(tmp, &ap_dev->pendingq, list)
+			if (tmp->psmid == ap_msg->psmid) {
+				ap_dev->pendingq_count--;
+				goto found;
+			}
+		ap_dev->requestq_count--;
+found:
+		list_del_init(&ap_msg->list);
+	}
+	spin_unlock_bh(&ap_dev->lock);
+}
+EXPORT_SYMBOL(ap_cancel_message);
+
 /*
  * AP device related attributes.
  */
@@ -690,21 +1096,17 @@
 	int rc = 0;
 
 	spin_lock_bh(&ap_dev->lock);
-	switch (ap_dev->reset) {
-	case AP_RESET_IGNORE:
-		rc = snprintf(buf, PAGE_SIZE, "No Reset Timer set.\n");
-		break;
-	case AP_RESET_ARMED:
-		rc = snprintf(buf, PAGE_SIZE, "Reset Timer armed.\n");
-		break;
-	case AP_RESET_DO:
-		rc = snprintf(buf, PAGE_SIZE, "Reset Timer expired.\n");
-		break;
-	case AP_RESET_IN_PROGRESS:
+	switch (ap_dev->state) {
+	case AP_STATE_RESET_START:
+	case AP_STATE_RESET_WAIT:
 		rc = snprintf(buf, PAGE_SIZE, "Reset in progress.\n");
 		break;
-	default:
+	case AP_STATE_WORKING:
+	case AP_STATE_QUEUE_FULL:
+		rc = snprintf(buf, PAGE_SIZE, "Reset Timer armed.\n");
 		break;
+	default:
+		rc = snprintf(buf, PAGE_SIZE, "No Reset Timer set.\n");
 	}
 	spin_unlock_bh(&ap_dev->lock);
 	return rc;
@@ -719,17 +1121,12 @@
 	int rc = 0;
 
 	spin_lock_bh(&ap_dev->lock);
-	switch (ap_dev->interrupt) {
-	case AP_INTR_DISABLED:
-		rc = snprintf(buf, PAGE_SIZE, "Interrupts disabled.\n");
-		break;
-	case AP_INTR_ENABLED:
-		rc = snprintf(buf, PAGE_SIZE, "Interrupts enabled.\n");
-		break;
-	case AP_INTR_IN_PROGRESS:
+	if (ap_dev->state == AP_STATE_SETIRQ_WAIT)
 		rc = snprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n");
-		break;
-	}
+	else if (ap_dev->interrupt == AP_INTR_ENABLED)
+		rc = snprintf(buf, PAGE_SIZE, "Interrupts enabled.\n");
+	else
+		rc = snprintf(buf, PAGE_SIZE, "Interrupts disabled.\n");
 	spin_unlock_bh(&ap_dev->lock);
 	return rc;
 }
@@ -823,99 +1220,95 @@
 	return retval;
 }
 
-static int ap_bus_suspend(struct device *dev, pm_message_t state)
+static int ap_dev_suspend(struct device *dev, pm_message_t state)
 {
 	struct ap_device *ap_dev = to_ap_dev(dev);
-	unsigned long flags;
 
-	if (!ap_suspend_flag) {
-		ap_suspend_flag = 1;
-
-		/* Disable scanning for devices, thus we do not want to scan
-		 * for them after removing.
-		 */
-		del_timer_sync(&ap_config_timer);
-		if (ap_work_queue != NULL) {
-			destroy_workqueue(ap_work_queue);
-			ap_work_queue = NULL;
-		}
-
-		tasklet_disable(&ap_tasklet);
-	}
 	/* Poll on the device until all requests are finished. */
-	do {
-		flags = 0;
-		spin_lock_bh(&ap_dev->lock);
-		__ap_poll_device(ap_dev, &flags);
-		spin_unlock_bh(&ap_dev->lock);
-	} while ((flags & 1) || (flags & 2));
-
 	spin_lock_bh(&ap_dev->lock);
-	ap_dev->unregistered = 1;
+	ap_dev->state = AP_STATE_SUSPEND_WAIT;
+	while (ap_sm_event(ap_dev, AP_EVENT_POLL) != AP_WAIT_NONE)
+		;
+	ap_dev->state = AP_STATE_BORKED;
 	spin_unlock_bh(&ap_dev->lock);
-
 	return 0;
 }
 
-static int ap_bus_resume(struct device *dev)
+static int ap_dev_resume(struct device *dev)
 {
-	struct ap_device *ap_dev = to_ap_dev(dev);
+	return 0;
+}
+
+static void ap_bus_suspend(void)
+{
+	ap_suspend_flag = 1;
+	/*
+	 * Disable scanning for devices, thus we do not want to scan
+	 * for them after removing.
+	 */
+	flush_work(&ap_scan_work);
+	tasklet_disable(&ap_tasklet);
+}
+
+static int __ap_devices_unregister(struct device *dev, void *dummy)
+{
+	device_unregister(dev);
+	return 0;
+}
+
+static void ap_bus_resume(void)
+{
 	int rc;
 
-	if (ap_suspend_flag) {
-		ap_suspend_flag = 0;
-		if (ap_interrupts_available()) {
-			if (!ap_using_interrupts()) {
-				rc = register_adapter_interrupt(&ap_airq);
-				ap_airq_flag = (rc == 0);
-			}
-		} else {
-			if (ap_using_interrupts()) {
-				unregister_adapter_interrupt(&ap_airq);
-				ap_airq_flag = 0;
-			}
-		}
-		ap_query_configuration();
-		if (!user_set_domain) {
-			ap_domain_index = -1;
-			ap_select_domain();
-		}
-		init_timer(&ap_config_timer);
-		ap_config_timer.function = ap_config_timeout;
-		ap_config_timer.data = 0;
-		ap_config_timer.expires = jiffies + ap_config_time * HZ;
-		add_timer(&ap_config_timer);
-		ap_work_queue = create_singlethread_workqueue("kapwork");
-		if (!ap_work_queue)
-			return -ENOMEM;
-		tasklet_enable(&ap_tasklet);
-		if (!ap_using_interrupts())
-			ap_schedule_poll_timer();
-		else
-			tasklet_schedule(&ap_tasklet);
-		if (ap_thread_flag)
-			rc = ap_poll_thread_start();
-		else
-			rc = 0;
-	} else
-		rc = 0;
-	if (AP_QID_QUEUE(ap_dev->qid) != ap_domain_index) {
-		spin_lock_bh(&ap_dev->lock);
-		ap_dev->qid = AP_MKQID(AP_QID_DEVICE(ap_dev->qid),
-				       ap_domain_index);
-		spin_unlock_bh(&ap_dev->lock);
+	/* Unconditionally remove all AP devices */
+	bus_for_each_dev(&ap_bus_type, NULL, NULL, __ap_devices_unregister);
+	/* Reset thin interrupt setting */
+	if (ap_interrupts_available() && !ap_using_interrupts()) {
+		rc = register_adapter_interrupt(&ap_airq);
+		ap_airq_flag = (rc == 0);
 	}
-	queue_work(ap_work_queue, &ap_config_work);
-
-	return rc;
+	if (!ap_interrupts_available() && ap_using_interrupts()) {
+		unregister_adapter_interrupt(&ap_airq);
+		ap_airq_flag = 0;
+	}
+	/* Reset domain */
+	if (!user_set_domain)
+		ap_domain_index = -1;
+	/* Get things going again */
+	ap_suspend_flag = 0;
+	if (ap_airq_flag)
+		xchg(ap_airq.lsi_ptr, 0);
+	tasklet_enable(&ap_tasklet);
+	queue_work(system_long_wq, &ap_scan_work);
 }
 
+static int ap_power_event(struct notifier_block *this, unsigned long event,
+			  void *ptr)
+{
+	switch (event) {
+	case PM_HIBERNATION_PREPARE:
+	case PM_SUSPEND_PREPARE:
+		ap_bus_suspend();
+		break;
+	case PM_POST_HIBERNATION:
+	case PM_POST_SUSPEND:
+		ap_bus_resume();
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+static struct notifier_block ap_power_notifier = {
+	.notifier_call = ap_power_event,
+};
+
 static struct bus_type ap_bus_type = {
 	.name = "ap",
 	.match = &ap_bus_match,
 	.uevent = &ap_uevent,
-	.suspend = ap_bus_suspend,
-	.resume = ap_bus_resume
+	.suspend = ap_dev_suspend,
+	.resume = ap_dev_resume,
 };
 
 static int ap_device_probe(struct device *dev)
@@ -925,21 +1318,9 @@
 	int rc;
 
 	ap_dev->drv = ap_drv;
-
-	spin_lock_bh(&ap_device_list_lock);
-	list_add(&ap_dev->list, &ap_device_list);
-	spin_unlock_bh(&ap_device_list_lock);
-
 	rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV;
-	if (rc) {
-		spin_lock_bh(&ap_device_list_lock);
-		list_del_init(&ap_dev->list);
-		spin_unlock_bh(&ap_device_list_lock);
-	} else {
-		if (ap_dev->reset == AP_RESET_IN_PROGRESS ||
-			ap_dev->interrupt == AP_INTR_IN_PROGRESS)
-			__ap_schedule_poll_timer();
-	}
+	if (rc)
+		ap_dev->drv = NULL;
 	return rc;
 }
 
@@ -956,12 +1337,14 @@
 	list_for_each_entry_safe(ap_msg, next, &ap_dev->pendingq, list) {
 		list_del_init(&ap_msg->list);
 		ap_dev->pendingq_count--;
-		ap_msg->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV));
+		ap_msg->rc = -EAGAIN;
+		ap_msg->receive(ap_dev, ap_msg, NULL);
 	}
 	list_for_each_entry_safe(ap_msg, next, &ap_dev->requestq, list) {
 		list_del_init(&ap_msg->list);
 		ap_dev->requestq_count--;
-		ap_msg->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV));
+		ap_msg->rc = -EAGAIN;
+		ap_msg->receive(ap_dev, ap_msg, NULL);
 	}
 }
 
@@ -991,6 +1374,11 @@
 	return 0;
 }
 
+static void ap_device_release(struct device *dev)
+{
+	kfree(to_ap_dev(dev));
+}
+
 int ap_driver_register(struct ap_driver *ap_drv, struct module *owner,
 		       char *name)
 {
@@ -1013,60 +1401,16 @@
 
 void ap_bus_force_rescan(void)
 {
-	/* reconfigure the AP bus rescan timer. */
-	mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ);
+	if (ap_suspend_flag)
+		return;
 	/* processing a asynchronous bus rescan */
-	queue_work(ap_work_queue, &ap_config_work);
-	flush_work(&ap_config_work);
+	del_timer(&ap_config_timer);
+	queue_work(system_long_wq, &ap_scan_work);
+	flush_work(&ap_scan_work);
 }
 EXPORT_SYMBOL(ap_bus_force_rescan);
 
 /*
- * ap_test_config(): helper function to extract the nrth bit
- *		     within the unsigned int array field.
- */
-static inline int ap_test_config(unsigned int *field, unsigned int nr)
-{
-	if (nr > 0xFFu)
-		return 0;
-	return ap_test_bit((field + (nr >> 5)), (nr & 0x1f));
-}
-
-/*
- * ap_test_config_card_id(): Test, whether an AP card ID is configured.
- * @id AP card ID
- *
- * Returns 0 if the card is not configured
- *	   1 if the card is configured or
- *	     if the configuration information is not available
- */
-static inline int ap_test_config_card_id(unsigned int id)
-{
-	if (!ap_configuration)
-		return 1;
-	return ap_test_config(ap_configuration->apm, id);
-}
-
-/*
- * ap_test_config_domain(): Test, whether an AP usage domain is configured.
- * @domain AP usage domain ID
- *
- * Returns 0 if the usage domain is not configured
- *	   1 if the usage domain is configured or
- *	     if the configuration information is not available
- */
-static inline int ap_test_config_domain(unsigned int domain)
-{
-	if (!ap_configuration)	  /* QCI not supported */
-		if (domain < 16)
-			return 1; /* then domains 0...15 are configured */
-		else
-			return 0;
-	else
-		return ap_test_config(ap_configuration->aqm, domain);
-}
-
-/*
  * AP bus attributes.
  */
 static ssize_t ap_domain_show(struct bus_type *bus, char *buf)
@@ -1078,21 +1422,20 @@
 
 static ssize_t ap_control_domain_mask_show(struct bus_type *bus, char *buf)
 {
-	if (ap_configuration != NULL) { /* QCI not supported */
-		if (test_facility(76)) { /* format 1 - 256 bit domain field */
-			return snprintf(buf, PAGE_SIZE,
-				"0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
+	if (!ap_configuration)	/* QCI not supported */
+		return snprintf(buf, PAGE_SIZE, "not supported\n");
+	if (!test_facility(76))
+		/* format 0 - 16 bit domain field */
+		return snprintf(buf, PAGE_SIZE, "%08x%08x\n",
+				ap_configuration->adm[0],
+				ap_configuration->adm[1]);
+	/* format 1 - 256 bit domain field */
+	return snprintf(buf, PAGE_SIZE,
+			"0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
 			ap_configuration->adm[0], ap_configuration->adm[1],
 			ap_configuration->adm[2], ap_configuration->adm[3],
 			ap_configuration->adm[4], ap_configuration->adm[5],
 			ap_configuration->adm[6], ap_configuration->adm[7]);
-		} else { /* format 0 - 16 bit domain field */
-			return snprintf(buf, PAGE_SIZE, "%08x%08x\n",
-			ap_configuration->adm[0], ap_configuration->adm[1]);
-		  }
-	} else {
-		return snprintf(buf, PAGE_SIZE, "not supported\n");
-	  }
 }
 
 static BUS_ATTR(ap_control_domain_mask, 0444,
@@ -1119,11 +1462,7 @@
 	if (sscanf(buf, "%d\n", &time) != 1 || time < 5 || time > 120)
 		return -EINVAL;
 	ap_config_time = time;
-	if (!timer_pending(&ap_config_timer) ||
-	    !mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ)) {
-		ap_config_timer.expires = jiffies + ap_config_time * HZ;
-		add_timer(&ap_config_timer);
-	}
+	mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ);
 	return count;
 }
 
@@ -1144,9 +1483,8 @@
 	if (flag) {
 		rc = ap_poll_thread_start();
 		if (rc)
-			return rc;
-	}
-	else
+			count = rc;
+	} else
 		ap_poll_thread_stop();
 	return count;
 }
@@ -1184,35 +1522,12 @@
 
 static ssize_t ap_max_domain_id_show(struct bus_type *bus, char *buf)
 {
-	ap_qid_t qid;
-	int i, nd, max_domain_id = -1;
-	unsigned long fbits;
+	int max_domain_id;
 
-	if (ap_configuration) {
-		if (ap_domain_index >= 0 && ap_domain_index < AP_DOMAINS) {
-			for (i = 0; i < AP_DEVICES; i++) {
-				if (!ap_test_config_card_id(i))
-					continue;
-				qid = AP_MKQID(i, ap_domain_index);
-				fbits = ap_query_facilities(qid);
-				if (fbits & (1UL << 57)) {
-					/* the N bit is 0, Nd field is filled */
-					nd = (int)((fbits & 0x00FF0000UL)>>16);
-					if (nd > 0)
-						max_domain_id = nd;
-					else
-						max_domain_id = 15;
-				} else {
-					/* N bit is 1, max 16 domains */
-					max_domain_id = 15;
-				}
-				break;
-			}
-		}
-	} else {
-		/* no APXA support, older machines with max 16 domains */
+	if (ap_configuration)
+		max_domain_id = ap_max_domain_id ? : -1;
+	else
 		max_domain_id = 15;
-	}
 	return snprintf(buf, PAGE_SIZE, "%d\n", max_domain_id);
 }
 
@@ -1230,44 +1545,22 @@
 };
 
 /**
- * ap_query_configuration(): Query AP configuration information.
- *
- * Query information of installed cards and configured domains from AP.
- */
-static void ap_query_configuration(void)
-{
-	if (ap_configuration_available()) {
-		if (!ap_configuration)
-			ap_configuration =
-				kzalloc(sizeof(struct ap_config_info),
-					GFP_KERNEL);
-		if (ap_configuration)
-			__ap_query_configuration(ap_configuration);
-	} else
-		ap_configuration = NULL;
-}
-
-/**
  * ap_select_domain(): Select an AP domain.
  *
  * Pick one of the 16 AP domains.
  */
 static int ap_select_domain(void)
 {
-	int queue_depth, device_type, count, max_count, best_domain;
-	ap_qid_t qid;
-	int rc, i, j;
-
-	/* IF APXA isn't installed, only 16 domains could be defined */
-	if (!ap_configuration->ap_extended && (ap_domain_index > 15))
-		return -EINVAL;
+	int count, max_count, best_domain;
+	struct ap_queue_status status;
+	int i, j;
 
 	/*
 	 * We want to use a single domain. Either the one specified with
 	 * the "domain=" parameter or the domain with the maximum number
 	 * of devices.
 	 */
-	if (ap_domain_index >= 0 && ap_domain_index < AP_DOMAINS)
+	if (ap_domain_index >= 0)
 		/* Domain has already been selected. */
 		return 0;
 	best_domain = -1;
@@ -1279,9 +1572,8 @@
 		for (j = 0; j < AP_DEVICES; j++) {
 			if (!ap_test_config_card_id(j))
 				continue;
-			qid = AP_MKQID(j, i);
-			rc = ap_query_queue(qid, &queue_depth, &device_type);
-			if (rc)
+			status = ap_test_queue(AP_MKQID(j, i), NULL);
+			if (status.response_code != AP_RESPONSE_NORMAL)
 				continue;
 			count++;
 		}
@@ -1298,109 +1590,6 @@
 }
 
 /**
- * ap_probe_device_type(): Find the device type of an AP.
- * @ap_dev: pointer to the AP device.
- *
- * Find the device type if query queue returned a device type of 0.
- */
-static int ap_probe_device_type(struct ap_device *ap_dev)
-{
-	static unsigned char msg[] = {
-		0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x01,0x00,0x43,0x43,0x41,0x2d,0x41,0x50,
-		0x50,0x4c,0x20,0x20,0x20,0x01,0x01,0x01,
-		0x00,0x00,0x00,0x00,0x50,0x4b,0x00,0x00,
-		0x00,0x00,0x01,0x1c,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x05,0xb8,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x70,0x00,0x41,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x54,0x32,0x01,0x00,0xa0,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0xb8,0x05,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x0a,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-		0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,
-		0x49,0x43,0x53,0x46,0x20,0x20,0x20,0x20,
-		0x50,0x4b,0x0a,0x00,0x50,0x4b,0x43,0x53,
-		0x2d,0x31,0x2e,0x32,0x37,0x00,0x11,0x22,
-		0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,
-		0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,
-		0x99,0x00,0x11,0x22,0x33,0x44,0x55,0x66,
-		0x77,0x88,0x99,0x00,0x11,0x22,0x33,0x44,
-		0x55,0x66,0x77,0x88,0x99,0x00,0x11,0x22,
-		0x33,0x44,0x55,0x66,0x77,0x88,0x99,0x00,
-		0x11,0x22,0x33,0x5d,0x00,0x5b,0x00,0x77,
-		0x88,0x1e,0x00,0x00,0x57,0x00,0x00,0x00,
-		0x00,0x04,0x00,0x00,0x4f,0x00,0x00,0x00,
-		0x03,0x02,0x00,0x00,0x40,0x01,0x00,0x01,
-		0xce,0x02,0x68,0x2d,0x5f,0xa9,0xde,0x0c,
-		0xf6,0xd2,0x7b,0x58,0x4b,0xf9,0x28,0x68,
-		0x3d,0xb4,0xf4,0xef,0x78,0xd5,0xbe,0x66,
-		0x63,0x42,0xef,0xf8,0xfd,0xa4,0xf8,0xb0,
-		0x8e,0x29,0xc2,0xc9,0x2e,0xd8,0x45,0xb8,
-		0x53,0x8c,0x6f,0x4e,0x72,0x8f,0x6c,0x04,
-		0x9c,0x88,0xfc,0x1e,0xc5,0x83,0x55,0x57,
-		0xf7,0xdd,0xfd,0x4f,0x11,0x36,0x95,0x5d,
-	};
-	struct ap_queue_status status;
-	unsigned long long psmid;
-	char *reply;
-	int rc, i;
-
-	reply = (void *) get_zeroed_page(GFP_KERNEL);
-	if (!reply) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	status = __ap_send(ap_dev->qid, 0x0102030405060708ULL,
-			   msg, sizeof(msg), 0);
-	if (status.response_code != AP_RESPONSE_NORMAL) {
-		rc = -ENODEV;
-		goto out_free;
-	}
-
-	/* Wait for the test message to complete. */
-	for (i = 0; i < 6; i++) {
-		msleep(300);
-		status = __ap_recv(ap_dev->qid, &psmid, reply, 4096);
-		if (status.response_code == AP_RESPONSE_NORMAL &&
-		    psmid == 0x0102030405060708ULL)
-			break;
-	}
-	if (i < 6) {
-		/* Got an answer. */
-		if (reply[0] == 0x00 && reply[1] == 0x86)
-			ap_dev->device_type = AP_DEVICE_TYPE_PCICC;
-		else
-			ap_dev->device_type = AP_DEVICE_TYPE_PCICA;
-		rc = 0;
-	} else
-		rc = -ENODEV;
-
-out_free:
-	free_page((unsigned long) reply);
-out:
-	return rc;
-}
-
-static void ap_interrupt_handler(struct airq_struct *airq)
-{
-	inc_irq_stat(IRQIO_APB);
-	tasklet_schedule(&ap_tasklet);
-}
-
-/**
  * __ap_scan_bus(): Scan the AP bus.
  * @dev: Pointer to device
  * @data: Pointer to data
@@ -1412,49 +1601,38 @@
 	return to_ap_dev(dev)->qid == (ap_qid_t)(unsigned long) data;
 }
 
-static void ap_device_release(struct device *dev)
-{
-	struct ap_device *ap_dev = to_ap_dev(dev);
-
-	kfree(ap_dev);
-}
-
 static void ap_scan_bus(struct work_struct *unused)
 {
 	struct ap_device *ap_dev;
 	struct device *dev;
 	ap_qid_t qid;
 	int queue_depth = 0, device_type = 0;
-	unsigned int device_functions;
-	int rc, i;
+	unsigned int device_functions = 0;
+	int rc, i, borked;
 
 	ap_query_configuration();
-	if (ap_select_domain() != 0) {
-		return;
-	}
+	if (ap_select_domain() != 0)
+		goto out;
+
 	for (i = 0; i < AP_DEVICES; i++) {
 		qid = AP_MKQID(i, ap_domain_index);
 		dev = bus_find_device(&ap_bus_type, NULL,
 				      (void *)(unsigned long)qid,
 				      __ap_scan_bus);
-		if (ap_test_config_card_id(i))
-			rc = ap_query_queue(qid, &queue_depth, &device_type);
-		else
-			rc = -ENODEV;
+		rc = ap_query_queue(qid, &queue_depth, &device_type,
+				    &device_functions);
 		if (dev) {
 			ap_dev = to_ap_dev(dev);
 			spin_lock_bh(&ap_dev->lock);
-			if (rc == -ENODEV || ap_dev->unregistered) {
-				spin_unlock_bh(&ap_dev->lock);
-				if (ap_dev->unregistered)
-					i--;
-				device_unregister(dev);
-				put_device(dev);
-				continue;
-			}
+			if (rc == -ENODEV)
+				ap_dev->state = AP_STATE_BORKED;
+			borked = ap_dev->state == AP_STATE_BORKED;
 			spin_unlock_bh(&ap_dev->lock);
+			if (borked)	/* Remove broken device */
+				device_unregister(dev);
 			put_device(dev);
-			continue;
+			if (!borked)
+				continue;
 		}
 		if (rc)
 			continue;
@@ -1462,525 +1640,72 @@
 		if (!ap_dev)
 			break;
 		ap_dev->qid = qid;
-		rc = ap_init_queue(ap_dev);
-		if ((rc != 0) && (rc != -EBUSY)) {
-			kfree(ap_dev);
-			continue;
-		}
+		ap_dev->state = AP_STATE_RESET_START;
+		ap_dev->interrupt = AP_INTR_DISABLED;
 		ap_dev->queue_depth = queue_depth;
-		ap_dev->unregistered = 1;
+		ap_dev->raw_hwtype = device_type;
+		ap_dev->device_type = device_type;
+		ap_dev->functions = device_functions;
 		spin_lock_init(&ap_dev->lock);
 		INIT_LIST_HEAD(&ap_dev->pendingq);
 		INIT_LIST_HEAD(&ap_dev->requestq);
 		INIT_LIST_HEAD(&ap_dev->list);
 		setup_timer(&ap_dev->timeout, ap_request_timeout,
 			    (unsigned long) ap_dev);
-		switch (device_type) {
-		case 0:
-			/* device type probing for old cards */
-			if (ap_probe_device_type(ap_dev)) {
-				kfree(ap_dev);
-				continue;
-			}
-			break;
-		default:
-			ap_dev->device_type = device_type;
-		}
-		ap_dev->raw_hwtype = device_type;
-
-		rc = ap_query_functions(qid, &device_functions);
-		if (!rc)
-			ap_dev->functions = device_functions;
-		else
-			ap_dev->functions = 0u;
 
 		ap_dev->device.bus = &ap_bus_type;
 		ap_dev->device.parent = ap_root_device;
-		if (dev_set_name(&ap_dev->device, "card%02x",
-				 AP_QID_DEVICE(ap_dev->qid))) {
+		rc = dev_set_name(&ap_dev->device, "card%02x",
+				  AP_QID_DEVICE(ap_dev->qid));
+		if (rc) {
 			kfree(ap_dev);
 			continue;
 		}
+		/* Add to list of devices */
+		spin_lock_bh(&ap_device_list_lock);
+		list_add(&ap_dev->list, &ap_device_list);
+		spin_unlock_bh(&ap_device_list_lock);
+		/* Start with a device reset */
+		spin_lock_bh(&ap_dev->lock);
+		ap_sm_wait(ap_sm_event(ap_dev, AP_EVENT_POLL));
+		spin_unlock_bh(&ap_dev->lock);
+		/* Register device */
 		ap_dev->device.release = ap_device_release;
 		rc = device_register(&ap_dev->device);
 		if (rc) {
+			spin_lock_bh(&ap_dev->lock);
+			list_del_init(&ap_dev->list);
+			spin_unlock_bh(&ap_dev->lock);
 			put_device(&ap_dev->device);
 			continue;
 		}
 		/* Add device attributes. */
 		rc = sysfs_create_group(&ap_dev->device.kobj,
 					&ap_dev_attr_group);
-		if (!rc) {
-			spin_lock_bh(&ap_dev->lock);
-			ap_dev->unregistered = 0;
-			spin_unlock_bh(&ap_dev->lock);
-		}
-		else
+		if (rc) {
 			device_unregister(&ap_dev->device);
-	}
-}
-
-static void
-ap_config_timeout(unsigned long ptr)
-{
-	queue_work(ap_work_queue, &ap_config_work);
-	ap_config_timer.expires = jiffies + ap_config_time * HZ;
-	add_timer(&ap_config_timer);
-}
-
-/**
- * ap_poll_read(): Receive pending reply messages from an AP device.
- * @ap_dev: pointer to the AP device
- * @flags: pointer to control flags, bit 2^0 is set if another poll is
- *	   required, bit 2^1 is set if the poll timer needs to get armed
- *
- * Returns 0 if the device is still present, -ENODEV if not.
- */
-static int ap_poll_read(struct ap_device *ap_dev, unsigned long *flags)
-{
-	struct ap_queue_status status;
-	struct ap_message *ap_msg;
-
-	if (ap_dev->queue_count <= 0)
-		return 0;
-	status = __ap_recv(ap_dev->qid, &ap_dev->reply->psmid,
-			   ap_dev->reply->message, ap_dev->reply->length);
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-		ap_dev->interrupt = status.int_enabled;
-		atomic_dec(&ap_poll_requests);
-		ap_decrease_queue_count(ap_dev);
-		list_for_each_entry(ap_msg, &ap_dev->pendingq, list) {
-			if (ap_msg->psmid != ap_dev->reply->psmid)
-				continue;
-			list_del_init(&ap_msg->list);
-			ap_dev->pendingq_count--;
-			ap_msg->receive(ap_dev, ap_msg, ap_dev->reply);
-			break;
-		}
-		if (ap_dev->queue_count > 0)
-			*flags |= 1;
-		break;
-	case AP_RESPONSE_NO_PENDING_REPLY:
-		ap_dev->interrupt = status.int_enabled;
-		if (status.queue_empty) {
-			/* The card shouldn't forget requests but who knows. */
-			atomic_sub(ap_dev->queue_count, &ap_poll_requests);
-			ap_dev->queue_count = 0;
-			list_splice_init(&ap_dev->pendingq, &ap_dev->requestq);
-			ap_dev->requestq_count += ap_dev->pendingq_count;
-			ap_dev->pendingq_count = 0;
-		} else
-			*flags |= 2;
-		break;
-	default:
-		return -ENODEV;
-	}
-	return 0;
-}
-
-/**
- * ap_poll_write(): Send messages from the request queue to an AP device.
- * @ap_dev: pointer to the AP device
- * @flags: pointer to control flags, bit 2^0 is set if another poll is
- *	   required, bit 2^1 is set if the poll timer needs to get armed
- *
- * Returns 0 if the device is still present, -ENODEV if not.
- */
-static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags)
-{
-	struct ap_queue_status status;
-	struct ap_message *ap_msg;
-
-	if (ap_dev->requestq_count <= 0 ||
-	    (ap_dev->queue_count >= ap_dev->queue_depth) ||
-	    (ap_dev->reset == AP_RESET_IN_PROGRESS))
-		return 0;
-	/* Start the next request on the queue. */
-	ap_msg = list_entry(ap_dev->requestq.next, struct ap_message, list);
-	status = __ap_send(ap_dev->qid, ap_msg->psmid,
-			   ap_msg->message, ap_msg->length, ap_msg->special);
-	switch (status.response_code) {
-	case AP_RESPONSE_NORMAL:
-		atomic_inc(&ap_poll_requests);
-		ap_increase_queue_count(ap_dev);
-		list_move_tail(&ap_msg->list, &ap_dev->pendingq);
-		ap_dev->requestq_count--;
-		ap_dev->pendingq_count++;
-		if (ap_dev->queue_count < ap_dev->queue_depth &&
-		    ap_dev->requestq_count > 0)
-			*flags |= 1;
-		*flags |= 2;
-		break;
-	case AP_RESPONSE_RESET_IN_PROGRESS:
-		__ap_schedule_poll_timer();
-	case AP_RESPONSE_Q_FULL:
-		*flags |= 2;
-		break;
-	case AP_RESPONSE_MESSAGE_TOO_BIG:
-	case AP_RESPONSE_REQ_FAC_NOT_INST:
-		return -EINVAL;
-	default:
-		return -ENODEV;
-	}
-	return 0;
-}
-
-/**
- * ap_poll_queue(): Poll AP device for pending replies and send new messages.
- * Check if the queue has a pending reset. In case it's done re-enable
- * interrupts, otherwise reschedule the poll_timer for another attempt.
- * @ap_dev: pointer to the bus device
- * @flags: pointer to control flags, bit 2^0 is set if another poll is
- *	   required, bit 2^1 is set if the poll timer needs to get armed
- *
- * Poll AP device for pending replies and send new messages. If either
- * ap_poll_read or ap_poll_write returns -ENODEV unregister the device.
- * Returns 0.
- */
-static inline int ap_poll_queue(struct ap_device *ap_dev, unsigned long *flags)
-{
-	int rc, depth, type;
-	struct ap_queue_status status;
-
-
-	if (ap_dev->reset == AP_RESET_IN_PROGRESS) {
-		status = ap_test_queue(ap_dev->qid, &depth, &type);
-		switch (status.response_code) {
-		case AP_RESPONSE_NORMAL:
-			ap_dev->reset = AP_RESET_IGNORE;
-			if (ap_using_interrupts()) {
-				rc = ap_queue_enable_interruption(
-					ap_dev, ap_airq.lsi_ptr);
-				if (!rc)
-					ap_dev->interrupt = AP_INTR_IN_PROGRESS;
-				else if (rc == -ENODEV) {
-					pr_err("Registering adapter interrupts for "
-					"AP %d failed\n", AP_QID_DEVICE(ap_dev->qid));
-					return rc;
-				}
-			}
-			/* fall through */
-		case AP_RESPONSE_BUSY:
-		case AP_RESPONSE_RESET_IN_PROGRESS:
-			*flags |= AP_POLL_AFTER_TIMEOUT;
-			break;
-		case AP_RESPONSE_Q_NOT_AVAIL:
-		case AP_RESPONSE_DECONFIGURED:
-		case AP_RESPONSE_CHECKSTOPPED:
-			return -ENODEV;
-		default:
-			break;
-		}
-	}
-
-	if ((ap_dev->reset != AP_RESET_IN_PROGRESS) &&
-		(ap_dev->interrupt == AP_INTR_IN_PROGRESS)) {
-		status = ap_test_queue(ap_dev->qid, &depth, &type);
-		if (ap_using_interrupts()) {
-			if (status.int_enabled == 1)
-				ap_dev->interrupt = AP_INTR_ENABLED;
-			else
-				*flags |= AP_POLL_AFTER_TIMEOUT;
-		} else
-			ap_dev->interrupt = AP_INTR_DISABLED;
-	}
-
-	rc = ap_poll_read(ap_dev, flags);
-	if (rc)
-		return rc;
-	return ap_poll_write(ap_dev, flags);
-}
-
-/**
- * __ap_queue_message(): Queue a message to a device.
- * @ap_dev: pointer to the AP device
- * @ap_msg: the message to be queued
- *
- * Queue a message to a device. Returns 0 if successful.
- */
-static int __ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
-{
-	struct ap_queue_status status;
-
-	if (list_empty(&ap_dev->requestq) &&
-	    (ap_dev->queue_count < ap_dev->queue_depth) &&
-	    (ap_dev->reset != AP_RESET_IN_PROGRESS)) {
-		status = __ap_send(ap_dev->qid, ap_msg->psmid,
-				   ap_msg->message, ap_msg->length,
-				   ap_msg->special);
-		switch (status.response_code) {
-		case AP_RESPONSE_NORMAL:
-			list_add_tail(&ap_msg->list, &ap_dev->pendingq);
-			atomic_inc(&ap_poll_requests);
-			ap_dev->pendingq_count++;
-			ap_increase_queue_count(ap_dev);
-			ap_dev->total_request_count++;
-			break;
-		case AP_RESPONSE_Q_FULL:
-		case AP_RESPONSE_RESET_IN_PROGRESS:
-			list_add_tail(&ap_msg->list, &ap_dev->requestq);
-			ap_dev->requestq_count++;
-			ap_dev->total_request_count++;
-			return -EBUSY;
-		case AP_RESPONSE_REQ_FAC_NOT_INST:
-		case AP_RESPONSE_MESSAGE_TOO_BIG:
-			ap_msg->receive(ap_dev, ap_msg, ERR_PTR(-EINVAL));
-			return -EINVAL;
-		default:	/* Device is gone. */
-			ap_msg->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV));
-			return -ENODEV;
-		}
-	} else {
-		list_add_tail(&ap_msg->list, &ap_dev->requestq);
-		ap_dev->requestq_count++;
-		ap_dev->total_request_count++;
-		return -EBUSY;
-	}
-	ap_schedule_poll_timer();
-	return 0;
-}
-
-void ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
-{
-	unsigned long flags;
-	int rc;
-
-	/* For asynchronous message handling a valid receive-callback
-	 * is required. */
-	BUG_ON(!ap_msg->receive);
-
-	spin_lock_bh(&ap_dev->lock);
-	if (!ap_dev->unregistered) {
-		/* Make room on the queue by polling for finished requests. */
-		rc = ap_poll_queue(ap_dev, &flags);
-		if (!rc)
-			rc = __ap_queue_message(ap_dev, ap_msg);
-		if (!rc)
-			wake_up(&ap_poll_wait);
-		if (rc == -ENODEV)
-			ap_dev->unregistered = 1;
-	} else {
-		ap_msg->receive(ap_dev, ap_msg, ERR_PTR(-ENODEV));
-		rc = -ENODEV;
-	}
-	spin_unlock_bh(&ap_dev->lock);
-	if (rc == -ENODEV)
-		device_unregister(&ap_dev->device);
-}
-EXPORT_SYMBOL(ap_queue_message);
-
-/**
- * ap_cancel_message(): Cancel a crypto request.
- * @ap_dev: The AP device that has the message queued
- * @ap_msg: The message that is to be removed
- *
- * Cancel a crypto request. This is done by removing the request
- * from the device pending or request queue. Note that the
- * request stays on the AP queue. When it finishes the message
- * reply will be discarded because the psmid can't be found.
- */
-void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
-{
-	struct ap_message *tmp;
-
-	spin_lock_bh(&ap_dev->lock);
-	if (!list_empty(&ap_msg->list)) {
-		list_for_each_entry(tmp, &ap_dev->pendingq, list)
-			if (tmp->psmid == ap_msg->psmid) {
-				ap_dev->pendingq_count--;
-				goto found;
-			}
-		ap_dev->requestq_count--;
-	found:
-		list_del_init(&ap_msg->list);
-	}
-	spin_unlock_bh(&ap_dev->lock);
-}
-EXPORT_SYMBOL(ap_cancel_message);
-
-/**
- * ap_poll_timeout(): AP receive polling for finished AP requests.
- * @unused: Unused pointer.
- *
- * Schedules the AP tasklet using a high resolution timer.
- */
-static enum hrtimer_restart ap_poll_timeout(struct hrtimer *unused)
-{
-	tasklet_schedule(&ap_tasklet);
-	return HRTIMER_NORESTART;
-}
-
-/**
- * ap_reset(): Reset a not responding AP device.
- * @ap_dev: Pointer to the AP device
- *
- * Reset a not responding AP device and move all requests from the
- * pending queue to the request queue.
- */
-static void ap_reset(struct ap_device *ap_dev, unsigned long *flags)
-{
-	int rc;
-
-	atomic_sub(ap_dev->queue_count, &ap_poll_requests);
-	ap_dev->queue_count = 0;
-	list_splice_init(&ap_dev->pendingq, &ap_dev->requestq);
-	ap_dev->requestq_count += ap_dev->pendingq_count;
-	ap_dev->pendingq_count = 0;
-	rc = ap_init_queue(ap_dev);
-	if (rc == -ENODEV)
-		ap_dev->unregistered = 1;
-	else
-		*flags |= AP_POLL_AFTER_TIMEOUT;
-}
-
-static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags)
-{
-	if (!ap_dev->unregistered) {
-		if (ap_poll_queue(ap_dev, flags))
-			ap_dev->unregistered = 1;
-		if (ap_dev->reset == AP_RESET_DO)
-			ap_reset(ap_dev, flags);
-	}
-	return 0;
-}
-
-/**
- * ap_poll_all(): Poll all AP devices.
- * @dummy: Unused variable
- *
- * Poll all AP devices on the bus in a round robin fashion. Continue
- * polling until bit 2^0 of the control flags is not set. If bit 2^1
- * of the control flags has been set arm the poll timer.
- */
-static void ap_poll_all(unsigned long dummy)
-{
-	unsigned long flags;
-	struct ap_device *ap_dev;
-
-	/* Reset the indicator if interrupts are used. Thus new interrupts can
-	 * be received. Doing it in the beginning of the tasklet is therefor
-	 * important that no requests on any AP get lost.
-	 */
-	if (ap_using_interrupts())
-		xchg(ap_airq.lsi_ptr, 0);
-	do {
-		flags = 0;
-		spin_lock(&ap_device_list_lock);
-		list_for_each_entry(ap_dev, &ap_device_list, list) {
-			spin_lock(&ap_dev->lock);
-			__ap_poll_device(ap_dev, &flags);
-			spin_unlock(&ap_dev->lock);
-		}
-		spin_unlock(&ap_device_list_lock);
-	} while (flags & AP_POLL_IMMEDIATELY);
-	if (flags & AP_POLL_AFTER_TIMEOUT)
-		__ap_schedule_poll_timer();
-}
-
-/**
- * ap_poll_thread(): Thread that polls for finished requests.
- * @data: Unused pointer
- *
- * AP bus poll thread. The purpose of this thread is to poll for
- * finished requests in a loop if there is a "free" cpu - that is
- * a cpu that doesn't have anything better to do. The polling stops
- * as soon as there is another task or if all messages have been
- * delivered.
- */
-static int ap_poll_thread(void *data)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	unsigned long flags;
-	int requests;
-	struct ap_device *ap_dev;
-
-	set_user_nice(current, MAX_NICE);
-	while (1) {
-		if (ap_suspend_flag)
-			return 0;
-		if (need_resched()) {
-			schedule();
 			continue;
 		}
-		add_wait_queue(&ap_poll_wait, &wait);
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (kthread_should_stop())
-			break;
-		requests = atomic_read(&ap_poll_requests);
-		if (requests <= 0)
-			schedule();
-		set_current_state(TASK_RUNNING);
-		remove_wait_queue(&ap_poll_wait, &wait);
-
-		flags = 0;
-		spin_lock_bh(&ap_device_list_lock);
-		list_for_each_entry(ap_dev, &ap_device_list, list) {
-			spin_lock(&ap_dev->lock);
-			__ap_poll_device(ap_dev, &flags);
-			spin_unlock(&ap_dev->lock);
-		}
-		spin_unlock_bh(&ap_device_list_lock);
 	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&ap_poll_wait, &wait);
-	return 0;
+out:
+	mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ);
 }
 
-static int ap_poll_thread_start(void)
+static void ap_config_timeout(unsigned long ptr)
 {
-	int rc;
-
-	if (ap_using_interrupts() || ap_suspend_flag)
-		return 0;
-	mutex_lock(&ap_poll_thread_mutex);
-	if (!ap_poll_kthread) {
-		ap_poll_kthread = kthread_run(ap_poll_thread, NULL, "appoll");
-		rc = PTR_RET(ap_poll_kthread);
-		if (rc)
-			ap_poll_kthread = NULL;
-	}
-	else
-		rc = 0;
-	mutex_unlock(&ap_poll_thread_mutex);
-	return rc;
-}
-
-static void ap_poll_thread_stop(void)
-{
-	mutex_lock(&ap_poll_thread_mutex);
-	if (ap_poll_kthread) {
-		kthread_stop(ap_poll_kthread);
-		ap_poll_kthread = NULL;
-	}
-	mutex_unlock(&ap_poll_thread_mutex);
-}
-
-/**
- * ap_request_timeout(): Handling of request timeouts
- * @data: Holds the AP device.
- *
- * Handles request timeouts.
- */
-static void ap_request_timeout(unsigned long data)
-{
-	struct ap_device *ap_dev = (struct ap_device *) data;
-
-	if (ap_dev->reset == AP_RESET_ARMED) {
-		ap_dev->reset = AP_RESET_DO;
-
-		if (ap_using_interrupts())
-			tasklet_schedule(&ap_tasklet);
-	}
+	if (ap_suspend_flag)
+		return;
+	queue_work(system_long_wq, &ap_scan_work);
 }
 
 static void ap_reset_domain(void)
 {
 	int i;
 
-	if ((ap_domain_index != -1) && (ap_test_config_domain(ap_domain_index)))
-		for (i = 0; i < AP_DEVICES; i++)
-			ap_reset_queue(AP_MKQID(i, ap_domain_index));
+	if (ap_domain_index == -1 || !ap_test_config_domain(ap_domain_index))
+		return;
+	for (i = 0; i < AP_DEVICES; i++)
+		ap_reset_queue(AP_MKQID(i, ap_domain_index));
 }
 
 static void ap_reset_all(void)
@@ -2009,11 +1734,24 @@
  */
 int __init ap_module_init(void)
 {
+	int max_domain_id;
 	int rc, i;
 
-	if (ap_domain_index < -1 || ap_domain_index >= AP_DOMAINS) {
-		pr_warning("%d is not a valid cryptographic domain\n",
-			   ap_domain_index);
+	if (ap_instructions_available() != 0) {
+		pr_warn("The hardware system does not support AP instructions\n");
+		return -ENODEV;
+	}
+
+	/* Get AP configuration data if available */
+	ap_init_configuration();
+
+	if (ap_configuration)
+		max_domain_id = ap_max_domain_id ? : (AP_DOMAINS - 1);
+	else
+		max_domain_id = 15;
+	if (ap_domain_index < -1 || ap_domain_index > max_domain_id) {
+		pr_warn("%d is not a valid cryptographic domain\n",
+			ap_domain_index);
 		return -EINVAL;
 	}
 	/* In resume callback we need to know if the user had set the domain.
@@ -2022,11 +1760,6 @@
 	if (ap_domain_index >= 0)
 		user_set_domain = 1;
 
-	if (ap_instructions_available() != 0) {
-		pr_warning("The hardware system does not support "
-			   "AP instructions\n");
-		return -ENODEV;
-	}
 	if (ap_interrupts_available()) {
 		rc = register_adapter_interrupt(&ap_airq);
 		ap_airq_flag = (rc == 0);
@@ -2050,24 +1783,11 @@
 	if (rc)
 		goto out_bus;
 
-	ap_work_queue = create_singlethread_workqueue("kapwork");
-	if (!ap_work_queue) {
-		rc = -ENOMEM;
-		goto out_root;
-	}
-
-	ap_query_configuration();
-	if (ap_select_domain() == 0)
-		ap_scan_bus(NULL);
-
 	/* Setup the AP bus rescan timer. */
-	init_timer(&ap_config_timer);
-	ap_config_timer.function = ap_config_timeout;
-	ap_config_timer.data = 0;
-	ap_config_timer.expires = jiffies + ap_config_time * HZ;
-	add_timer(&ap_config_timer);
+	setup_timer(&ap_config_timer, ap_config_timeout, 0);
 
-	/* Setup the high resultion poll timer.
+	/*
+	 * Setup the high resultion poll timer.
 	 * If we are running under z/VM adjust polling to z/VM polling rate.
 	 */
 	if (MACHINE_IS_VM)
@@ -2083,13 +1803,18 @@
 			goto out_work;
 	}
 
+	rc = register_pm_notifier(&ap_power_notifier);
+	if (rc)
+		goto out_pm;
+
+	queue_work(system_long_wq, &ap_scan_work);
+
 	return 0;
 
+out_pm:
+	ap_poll_thread_stop();
 out_work:
-	del_timer_sync(&ap_config_timer);
 	hrtimer_cancel(&ap_poll_timer);
-	destroy_workqueue(ap_work_queue);
-out_root:
 	root_device_unregister(ap_root_device);
 out_bus:
 	while (i--)
@@ -2099,14 +1824,10 @@
 	unregister_reset_call(&ap_reset_call);
 	if (ap_using_interrupts())
 		unregister_adapter_interrupt(&ap_airq);
+	kfree(ap_configuration);
 	return rc;
 }
 
-static int __ap_match_all(struct device *dev, void *data)
-{
-	return 1;
-}
-
 /**
  * ap_modules_exit(): The module termination code
  *
@@ -2115,24 +1836,19 @@
 void ap_module_exit(void)
 {
 	int i;
-	struct device *dev;
 
 	ap_reset_domain();
 	ap_poll_thread_stop();
 	del_timer_sync(&ap_config_timer);
 	hrtimer_cancel(&ap_poll_timer);
-	destroy_workqueue(ap_work_queue);
 	tasklet_kill(&ap_tasklet);
-	while ((dev = bus_find_device(&ap_bus_type, NULL, NULL,
-		    __ap_match_all)))
-	{
-		device_unregister(dev);
-		put_device(dev);
-	}
+	bus_for_each_dev(&ap_bus_type, NULL, NULL, __ap_devices_unregister);
 	for (i = 0; ap_bus_attrs[i]; i++)
 		bus_remove_file(&ap_bus_type, ap_bus_attrs[i]);
+	unregister_pm_notifier(&ap_power_notifier);
 	root_device_unregister(ap_root_device);
 	bus_unregister(&ap_bus_type);
+	kfree(ap_configuration);
 	unregister_reset_call(&ap_reset_call);
 	if (ap_using_interrupts())
 		unregister_adapter_interrupt(&ap_airq);
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 00468c8..6adcbdf 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -36,9 +36,6 @@
 #define AP_CONFIG_TIME 30	/* Time in seconds between AP bus rescans. */
 #define AP_POLL_TIME 1		/* Time in ticks between receive polls. */
 
-#define AP_POLL_IMMEDIATELY	1 /* continue running poll tasklet */
-#define AP_POLL_AFTER_TIMEOUT	2 /* run poll tasklet again after timout */
-
 extern int ap_domain_index;
 
 /**
@@ -75,21 +72,9 @@
 	unsigned int pad2		: 16;
 } __packed;
 
-#define AP_QUEUE_STATUS_INVALID \
-		{ 1, 1, 1, 0xF, 1, 0xFF, 0xFFFF }
 
-static inline
-int ap_queue_status_invalid_test(struct ap_queue_status *status)
-{
-	struct ap_queue_status invalid = AP_QUEUE_STATUS_INVALID;
-	return !(memcmp(status, &invalid, sizeof(struct ap_queue_status)));
-}
-
-#define AP_MAX_BITS 31
 static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
 {
-	if (nr > AP_MAX_BITS)
-		return 0;
 	return (*ptr & (0x80000000u >> nr)) != 0;
 }
 
@@ -132,19 +117,45 @@
 #define AP_FUNC_APXA  6
 
 /*
- * AP reset flag states
- */
-#define AP_RESET_IGNORE	0	/* request timeout will be ignored */
-#define AP_RESET_ARMED	1	/* request timeout timer is active */
-#define AP_RESET_DO	2	/* AP reset required */
-#define AP_RESET_IN_PROGRESS	3	/* AP reset in progress */
-
-/*
  * AP interrupt states
  */
 #define AP_INTR_DISABLED	0	/* AP interrupt disabled */
 #define AP_INTR_ENABLED		1	/* AP interrupt enabled */
-#define AP_INTR_IN_PROGRESS	3	/* AP interrupt in progress */
+
+/*
+ * AP device states
+ */
+enum ap_state {
+	AP_STATE_RESET_START,
+	AP_STATE_RESET_WAIT,
+	AP_STATE_SETIRQ_WAIT,
+	AP_STATE_IDLE,
+	AP_STATE_WORKING,
+	AP_STATE_QUEUE_FULL,
+	AP_STATE_SUSPEND_WAIT,
+	AP_STATE_BORKED,
+	NR_AP_STATES
+};
+
+/*
+ * AP device events
+ */
+enum ap_event {
+	AP_EVENT_POLL,
+	AP_EVENT_TIMEOUT,
+	NR_AP_EVENTS
+};
+
+/*
+ * AP wait behaviour
+ */
+enum ap_wait {
+	AP_WAIT_AGAIN,		/* retry immediately */
+	AP_WAIT_TIMEOUT,	/* wait for timeout */
+	AP_WAIT_INTERRUPT,	/* wait for thin interrupt (if available) */
+	AP_WAIT_NONE,		/* no wait */
+	NR_AP_WAIT
+};
 
 struct ap_device;
 struct ap_message;
@@ -163,20 +174,22 @@
 int ap_driver_register(struct ap_driver *, struct module *, char *);
 void ap_driver_unregister(struct ap_driver *);
 
+typedef enum ap_wait (ap_func_t)(struct ap_device *ap_dev);
+
 struct ap_device {
 	struct device device;
 	struct ap_driver *drv;		/* Pointer to AP device driver. */
 	spinlock_t lock;		/* Per device lock. */
 	struct list_head list;		/* private list of all AP devices. */
 
+	enum ap_state state;		/* State of the AP device. */
+
 	ap_qid_t qid;			/* AP queue id. */
 	int queue_depth;		/* AP queue depth.*/
 	int device_type;		/* AP device type. */
 	int raw_hwtype;			/* AP raw hardware type. */
 	unsigned int functions;		/* AP device function bitfield. */
-	int unregistered;		/* marks AP device as unregistered */
 	struct timer_list timeout;	/* Timer for request timeouts. */
-	int reset;			/* Reset required after req. timeout. */
 
 	int interrupt;			/* indicate if interrupts are enabled */
 	int queue_count;		/* # messages currently on AP queue. */
@@ -199,6 +212,7 @@
 	unsigned long long psmid;	/* Message id. */
 	void *message;			/* Pointer to message buffer. */
 	size_t length;			/* Message length. */
+	int rc;				/* Return code for this message */
 
 	void *private;			/* ap driver private pointer. */
 	unsigned int special:1;		/* Used for special commands. */
@@ -231,6 +245,7 @@
 {
 	ap_msg->psmid = 0;
 	ap_msg->length = 0;
+	ap_msg->rc = 0;
 	ap_msg->special = 0;
 	ap_msg->receive = NULL;
 }
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 4eb4554..a9603eb 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -472,8 +472,7 @@
 	unsigned long long z1, z2, z3;
 	int rc, copied;
 
-	if (crt->outputdatalength < crt->inputdatalength ||
-	    (crt->inputdatalength & 1))
+	if (crt->outputdatalength < crt->inputdatalength)
 		return -EINVAL;
 	/*
 	 * As long as outputdatalength is big enough, we can set the
diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h
index 1f42f10..ca0cdbe 100644
--- a/drivers/s390/crypto/zcrypt_cca_key.h
+++ b/drivers/s390/crypto/zcrypt_cca_key.h
@@ -291,7 +291,7 @@
 
 	memset(key, 0, sizeof(*key));
 
-	short_len = crt->inputdatalength / 2;
+	short_len = (crt->inputdatalength + 1) / 2;
 	long_len = short_len + 8;
 	pad_len = -(3*long_len + 2*short_len) & 7;
 	key_len = 3*long_len + 2*short_len + pad_len + crt->inputdatalength;
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index 334e282..71ceee9 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -248,7 +248,7 @@
 	unsigned char *p, *q, *dp, *dq, *u, *inp;
 
 	mod_len = crt->inputdatalength;
-	short_len = mod_len / 2;
+	short_len = (mod_len + 1) / 2;
 
 	/*
 	 * CEX2A and CEX3A w/o FW update can handle requests up to
@@ -395,10 +395,8 @@
 	int length;
 
 	/* Copy the reply message to the request message buffer. */
-	if (IS_ERR(reply)) {
-		memcpy(msg->message, &error_reply, sizeof(error_reply));
-		goto out;
-	}
+	if (!reply)
+		goto out;	/* ap_msg->rc indicates the error */
 	t80h = reply->message;
 	if (t80h->type == TYPE80_RSP_CODE) {
 		if (ap_dev->device_type == AP_DEVICE_TYPE_CEX2A)
@@ -449,10 +447,12 @@
 	init_completion(&work);
 	ap_queue_message(zdev->ap_dev, &ap_msg);
 	rc = wait_for_completion_interruptible(&work);
-	if (rc == 0)
-		rc = convert_response(zdev, &ap_msg, mex->outputdata,
-				      mex->outputdatalength);
-	else
+	if (rc == 0) {
+		rc = ap_msg.rc;
+		if (rc == 0)
+			rc = convert_response(zdev, &ap_msg, mex->outputdata,
+					      mex->outputdatalength);
+	} else
 		/* Signal pending. */
 		ap_cancel_message(zdev->ap_dev, &ap_msg);
 out_free:
@@ -493,10 +493,12 @@
 	init_completion(&work);
 	ap_queue_message(zdev->ap_dev, &ap_msg);
 	rc = wait_for_completion_interruptible(&work);
-	if (rc == 0)
-		rc = convert_response(zdev, &ap_msg, crt->outputdata,
-				      crt->outputdatalength);
-	else
+	if (rc == 0) {
+		rc = ap_msg.rc;
+		if (rc == 0)
+			rc = convert_response(zdev, &ap_msg, crt->outputdata,
+					      crt->outputdatalength);
+	} else
 		/* Signal pending. */
 		ap_cancel_message(zdev->ap_dev, &ap_msg);
 out_free:
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 46b324c..7476221 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -829,10 +829,8 @@
 	int length;
 
 	/* Copy the reply message to the request message buffer. */
-	if (IS_ERR(reply)) {
-		memcpy(msg->message, &error_reply, sizeof(error_reply));
-		goto out;
-	}
+	if (!reply)
+		goto out;	/* ap_msg->rc indicates the error */
 	t86r = reply->message;
 	if (t86r->hdr.type == TYPE86_RSP_CODE &&
 		 t86r->cprbx.cprb_ver_id == 0x02) {
@@ -880,10 +878,8 @@
 	int length;
 
 	/* Copy the reply message to the request message buffer. */
-	if (IS_ERR(reply)) {
-		memcpy(msg->message, &error_reply, sizeof(error_reply));
-		goto out;
-	}
+	if (!reply)
+		goto out;	/* ap_msg->rc indicates the error */
 	t86r = reply->message;
 	if (t86r->hdr.type == TYPE86_RSP_CODE &&
 	    t86r->cprbx.cprb_ver_id == 0x04) {
@@ -935,10 +931,13 @@
 	init_completion(&resp_type.work);
 	ap_queue_message(zdev->ap_dev, &ap_msg);
 	rc = wait_for_completion_interruptible(&resp_type.work);
-	if (rc == 0)
-		rc = convert_response_ica(zdev, &ap_msg, mex->outputdata,
-					  mex->outputdatalength);
-	else
+	if (rc == 0) {
+		rc = ap_msg.rc;
+		if (rc == 0)
+			rc = convert_response_ica(zdev, &ap_msg,
+						  mex->outputdata,
+						  mex->outputdatalength);
+	} else
 		/* Signal pending. */
 		ap_cancel_message(zdev->ap_dev, &ap_msg);
 out_free:
@@ -976,10 +975,13 @@
 	init_completion(&resp_type.work);
 	ap_queue_message(zdev->ap_dev, &ap_msg);
 	rc = wait_for_completion_interruptible(&resp_type.work);
-	if (rc == 0)
-		rc = convert_response_ica(zdev, &ap_msg, crt->outputdata,
-					  crt->outputdatalength);
-	else
+	if (rc == 0) {
+		rc = ap_msg.rc;
+		if (rc == 0)
+			rc = convert_response_ica(zdev, &ap_msg,
+						  crt->outputdata,
+						  crt->outputdatalength);
+	} else
 		/* Signal pending. */
 		ap_cancel_message(zdev->ap_dev, &ap_msg);
 out_free:
@@ -1017,9 +1019,11 @@
 	init_completion(&resp_type.work);
 	ap_queue_message(zdev->ap_dev, &ap_msg);
 	rc = wait_for_completion_interruptible(&resp_type.work);
-	if (rc == 0)
-		rc = convert_response_xcrb(zdev, &ap_msg, xcRB);
-	else
+	if (rc == 0) {
+		rc = ap_msg.rc;
+		if (rc == 0)
+			rc = convert_response_xcrb(zdev, &ap_msg, xcRB);
+	} else
 		/* Signal pending. */
 		ap_cancel_message(zdev->ap_dev, &ap_msg);
 out_free:
@@ -1057,9 +1061,12 @@
 	init_completion(&resp_type.work);
 	ap_queue_message(zdev->ap_dev, &ap_msg);
 	rc = wait_for_completion_interruptible(&resp_type.work);
-	if (rc == 0)
-		rc = convert_response_ep11_xcrb(zdev, &ap_msg, xcrb);
-	else /* Signal pending. */
+	if (rc == 0) {
+		rc = ap_msg.rc;
+		if (rc == 0)
+			rc = convert_response_ep11_xcrb(zdev, &ap_msg, xcrb);
+	} else
+		/* Signal pending. */
 		ap_cancel_message(zdev->ap_dev, &ap_msg);
 
 out_free:
@@ -1096,9 +1103,11 @@
 	init_completion(&resp_type.work);
 	ap_queue_message(zdev->ap_dev, &ap_msg);
 	rc = wait_for_completion_interruptible(&resp_type.work);
-	if (rc == 0)
-		rc = convert_response_rng(zdev, &ap_msg, buffer);
-	else
+	if (rc == 0) {
+		rc = ap_msg.rc;
+		if (rc == 0)
+			rc = convert_response_rng(zdev, &ap_msg, buffer);
+	} else
 		/* Signal pending. */
 		ap_cancel_message(zdev->ap_dev, &ap_msg);
 	kfree(ap_msg.message);
diff --git a/drivers/s390/crypto/zcrypt_pcica.c b/drivers/s390/crypto/zcrypt_pcica.c
deleted file mode 100644
index 7a743f4..0000000
--- a/drivers/s390/crypto/zcrypt_pcica.c
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- *  zcrypt 2.1.0
- *
- *  Copyright IBM Corp. 2001, 2006
- *  Author(s): Robert Burroughs
- *	       Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
- *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define KMSG_COMPONENT "zcrypt"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/atomic.h>
-#include <asm/uaccess.h>
-
-#include "ap_bus.h"
-#include "zcrypt_api.h"
-#include "zcrypt_error.h"
-#include "zcrypt_pcica.h"
-
-#define PCICA_MIN_MOD_SIZE	  1	/*    8 bits	*/
-#define PCICA_MAX_MOD_SIZE	256	/* 2048 bits	*/
-
-#define PCICA_SPEED_RATING	2800
-
-#define PCICA_MAX_MESSAGE_SIZE	0x3a0	/* sizeof(struct type4_lcr)	     */
-#define PCICA_MAX_RESPONSE_SIZE 0x110	/* max outputdatalength + type80_hdr */
-
-#define PCICA_CLEANUP_TIME	(15*HZ)
-
-static struct ap_device_id zcrypt_pcica_ids[] = {
-	{ AP_DEVICE(AP_DEVICE_TYPE_PCICA) },
-	{ /* end of list */ },
-};
-
-MODULE_DEVICE_TABLE(ap, zcrypt_pcica_ids);
-MODULE_AUTHOR("IBM Corporation");
-MODULE_DESCRIPTION("PCICA Cryptographic Coprocessor device driver, "
-		   "Copyright IBM Corp. 2001, 2006");
-MODULE_LICENSE("GPL");
-
-static int zcrypt_pcica_probe(struct ap_device *ap_dev);
-static void zcrypt_pcica_remove(struct ap_device *ap_dev);
-static void zcrypt_pcica_receive(struct ap_device *, struct ap_message *,
-				 struct ap_message *);
-
-static struct ap_driver zcrypt_pcica_driver = {
-	.probe = zcrypt_pcica_probe,
-	.remove = zcrypt_pcica_remove,
-	.ids = zcrypt_pcica_ids,
-	.request_timeout = PCICA_CLEANUP_TIME,
-};
-
-/**
- * Convert a ICAMEX message to a type4 MEX message.
- *
- * @zdev: crypto device pointer
- * @zreq: crypto request pointer
- * @mex: pointer to user input data
- *
- * Returns 0 on success or -EFAULT.
- */
-static int ICAMEX_msg_to_type4MEX_msg(struct zcrypt_device *zdev,
-				      struct ap_message *ap_msg,
-				      struct ica_rsa_modexpo *mex)
-{
-	unsigned char *modulus, *exponent, *message;
-	int mod_len;
-
-	mod_len = mex->inputdatalength;
-
-	if (mod_len <= 128) {
-		struct type4_sme *sme = ap_msg->message;
-		memset(sme, 0, sizeof(*sme));
-		ap_msg->length = sizeof(*sme);
-		sme->header.msg_fmt = TYPE4_SME_FMT;
-		sme->header.msg_len = sizeof(*sme);
-		sme->header.msg_type_code = TYPE4_TYPE_CODE;
-		sme->header.request_code = TYPE4_REQU_CODE;
-		modulus = sme->modulus + sizeof(sme->modulus) - mod_len;
-		exponent = sme->exponent + sizeof(sme->exponent) - mod_len;
-		message = sme->message + sizeof(sme->message) - mod_len;
-	} else {
-		struct type4_lme *lme = ap_msg->message;
-		memset(lme, 0, sizeof(*lme));
-		ap_msg->length = sizeof(*lme);
-		lme->header.msg_fmt = TYPE4_LME_FMT;
-		lme->header.msg_len = sizeof(*lme);
-		lme->header.msg_type_code = TYPE4_TYPE_CODE;
-		lme->header.request_code = TYPE4_REQU_CODE;
-		modulus = lme->modulus + sizeof(lme->modulus) - mod_len;
-		exponent = lme->exponent + sizeof(lme->exponent) - mod_len;
-		message = lme->message + sizeof(lme->message) - mod_len;
-	}
-
-	if (copy_from_user(modulus, mex->n_modulus, mod_len) ||
-	    copy_from_user(exponent, mex->b_key, mod_len) ||
-	    copy_from_user(message, mex->inputdata, mod_len))
-		return -EFAULT;
-	return 0;
-}
-
-/**
- * Convert a ICACRT message to a type4 CRT message.
- *
- * @zdev: crypto device pointer
- * @zreq: crypto request pointer
- * @crt: pointer to user input data
- *
- * Returns 0 on success or -EFAULT.
- */
-static int ICACRT_msg_to_type4CRT_msg(struct zcrypt_device *zdev,
-				      struct ap_message *ap_msg,
-				      struct ica_rsa_modexpo_crt *crt)
-{
-	unsigned char *p, *q, *dp, *dq, *u, *inp;
-	int mod_len, short_len, long_len;
-
-	mod_len = crt->inputdatalength;
-	short_len = mod_len / 2;
-	long_len = mod_len / 2 + 8;
-
-	if (mod_len <= 128) {
-		struct type4_scr *scr = ap_msg->message;
-		memset(scr, 0, sizeof(*scr));
-		ap_msg->length = sizeof(*scr);
-		scr->header.msg_type_code = TYPE4_TYPE_CODE;
-		scr->header.request_code = TYPE4_REQU_CODE;
-		scr->header.msg_fmt = TYPE4_SCR_FMT;
-		scr->header.msg_len = sizeof(*scr);
-		p = scr->p + sizeof(scr->p) - long_len;
-		q = scr->q + sizeof(scr->q) - short_len;
-		dp = scr->dp + sizeof(scr->dp) - long_len;
-		dq = scr->dq + sizeof(scr->dq) - short_len;
-		u = scr->u + sizeof(scr->u) - long_len;
-		inp = scr->message + sizeof(scr->message) - mod_len;
-	} else {
-		struct type4_lcr *lcr = ap_msg->message;
-		memset(lcr, 0, sizeof(*lcr));
-		ap_msg->length = sizeof(*lcr);
-		lcr->header.msg_type_code = TYPE4_TYPE_CODE;
-		lcr->header.request_code = TYPE4_REQU_CODE;
-		lcr->header.msg_fmt = TYPE4_LCR_FMT;
-		lcr->header.msg_len = sizeof(*lcr);
-		p = lcr->p + sizeof(lcr->p) - long_len;
-		q = lcr->q + sizeof(lcr->q) - short_len;
-		dp = lcr->dp + sizeof(lcr->dp) - long_len;
-		dq = lcr->dq + sizeof(lcr->dq) - short_len;
-		u = lcr->u + sizeof(lcr->u) - long_len;
-		inp = lcr->message + sizeof(lcr->message) - mod_len;
-	}
-
-	if (copy_from_user(p, crt->np_prime, long_len) ||
-	    copy_from_user(q, crt->nq_prime, short_len) ||
-	    copy_from_user(dp, crt->bp_key, long_len) ||
-	    copy_from_user(dq, crt->bq_key, short_len) ||
-	    copy_from_user(u, crt->u_mult_inv, long_len) ||
-	    copy_from_user(inp, crt->inputdata, mod_len))
-		return -EFAULT;
-	return 0;
-}
-
-/**
- * Copy results from a type 84 reply message back to user space.
- *
- * @zdev: crypto device pointer
- * @reply: reply AP message.
- * @data: pointer to user output data
- * @length: size of user output data
- *
- * Returns 0 on success or -EFAULT.
- */
-static int convert_type84(struct zcrypt_device *zdev,
-			  struct ap_message *reply,
-			  char __user *outputdata,
-			  unsigned int outputdatalength)
-{
-	struct type84_hdr *t84h = reply->message;
-	char *data;
-
-	if (t84h->len < sizeof(*t84h) + outputdatalength) {
-		/* The result is too short, the PCICA card may not do that.. */
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid, zdev->online, t84h->code);
-		return -EAGAIN;	/* repeat the request on a different device. */
-	}
-	BUG_ON(t84h->len > PCICA_MAX_RESPONSE_SIZE);
-	data = reply->message + t84h->len - outputdatalength;
-	if (copy_to_user(outputdata, data, outputdatalength))
-		return -EFAULT;
-	return 0;
-}
-
-static int convert_response(struct zcrypt_device *zdev,
-			    struct ap_message *reply,
-			    char __user *outputdata,
-			    unsigned int outputdatalength)
-{
-	/* Response type byte is the second byte in the response. */
-	switch (((unsigned char *) reply->message)[1]) {
-	case TYPE82_RSP_CODE:
-	case TYPE88_RSP_CODE:
-		return convert_error(zdev, reply);
-	case TYPE84_RSP_CODE:
-		return convert_type84(zdev, reply,
-				      outputdata, outputdatalength);
-	default: /* Unknown response type, this should NEVER EVER happen */
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
-		return -EAGAIN;	/* repeat the request on a different device. */
-	}
-}
-
-/**
- * This function is called from the AP bus code after a crypto request
- * "msg" has finished with the reply message "reply".
- * It is called from tasklet context.
- * @ap_dev: pointer to the AP device
- * @msg: pointer to the AP message
- * @reply: pointer to the AP reply message
- */
-static void zcrypt_pcica_receive(struct ap_device *ap_dev,
-				 struct ap_message *msg,
-				 struct ap_message *reply)
-{
-	static struct error_hdr error_reply = {
-		.type = TYPE82_RSP_CODE,
-		.reply_code = REP82_ERROR_MACHINE_FAILURE,
-	};
-	struct type84_hdr *t84h;
-	int length;
-
-	/* Copy the reply message to the request message buffer. */
-	if (IS_ERR(reply)) {
-		memcpy(msg->message, &error_reply, sizeof(error_reply));
-		goto out;
-	}
-	t84h = reply->message;
-	if (t84h->code == TYPE84_RSP_CODE) {
-		length = min(PCICA_MAX_RESPONSE_SIZE, (int) t84h->len);
-		memcpy(msg->message, reply->message, length);
-	} else
-		memcpy(msg->message, reply->message, sizeof error_reply);
-out:
-	complete((struct completion *) msg->private);
-}
-
-static atomic_t zcrypt_step = ATOMIC_INIT(0);
-
-/**
- * The request distributor calls this function if it picked the PCICA
- * device to handle a modexpo request.
- * @zdev: pointer to zcrypt_device structure that identifies the
- *	  PCICA device to the request distributor
- * @mex: pointer to the modexpo request buffer
- */
-static long zcrypt_pcica_modexpo(struct zcrypt_device *zdev,
-				 struct ica_rsa_modexpo *mex)
-{
-	struct ap_message ap_msg;
-	struct completion work;
-	int rc;
-
-	ap_init_message(&ap_msg);
-	ap_msg.message = kmalloc(PCICA_MAX_MESSAGE_SIZE, GFP_KERNEL);
-	if (!ap_msg.message)
-		return -ENOMEM;
-	ap_msg.receive = zcrypt_pcica_receive;
-	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
-				atomic_inc_return(&zcrypt_step);
-	ap_msg.private = &work;
-	rc = ICAMEX_msg_to_type4MEX_msg(zdev, &ap_msg, mex);
-	if (rc)
-		goto out_free;
-	init_completion(&work);
-	ap_queue_message(zdev->ap_dev, &ap_msg);
-	rc = wait_for_completion_interruptible(&work);
-	if (rc == 0)
-		rc = convert_response(zdev, &ap_msg, mex->outputdata,
-				      mex->outputdatalength);
-	else
-		/* Signal pending. */
-		ap_cancel_message(zdev->ap_dev, &ap_msg);
-out_free:
-	kfree(ap_msg.message);
-	return rc;
-}
-
-/**
- * The request distributor calls this function if it picked the PCICA
- * device to handle a modexpo_crt request.
- * @zdev: pointer to zcrypt_device structure that identifies the
- *	  PCICA device to the request distributor
- * @crt: pointer to the modexpoc_crt request buffer
- */
-static long zcrypt_pcica_modexpo_crt(struct zcrypt_device *zdev,
-				     struct ica_rsa_modexpo_crt *crt)
-{
-	struct ap_message ap_msg;
-	struct completion work;
-	int rc;
-
-	ap_init_message(&ap_msg);
-	ap_msg.message = kmalloc(PCICA_MAX_MESSAGE_SIZE, GFP_KERNEL);
-	if (!ap_msg.message)
-		return -ENOMEM;
-	ap_msg.receive = zcrypt_pcica_receive;
-	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
-				atomic_inc_return(&zcrypt_step);
-	ap_msg.private = &work;
-	rc = ICACRT_msg_to_type4CRT_msg(zdev, &ap_msg, crt);
-	if (rc)
-		goto out_free;
-	init_completion(&work);
-	ap_queue_message(zdev->ap_dev, &ap_msg);
-	rc = wait_for_completion_interruptible(&work);
-	if (rc == 0)
-		rc = convert_response(zdev, &ap_msg, crt->outputdata,
-				      crt->outputdatalength);
-	else
-		/* Signal pending. */
-		ap_cancel_message(zdev->ap_dev, &ap_msg);
-out_free:
-	kfree(ap_msg.message);
-	return rc;
-}
-
-/**
- * The crypto operations for a PCICA card.
- */
-static struct zcrypt_ops zcrypt_pcica_ops = {
-	.rsa_modexpo = zcrypt_pcica_modexpo,
-	.rsa_modexpo_crt = zcrypt_pcica_modexpo_crt,
-};
-
-/**
- * Probe function for PCICA cards. It always accepts the AP device
- * since the bus_match already checked the hardware type.
- * @ap_dev: pointer to the AP device.
- */
-static int zcrypt_pcica_probe(struct ap_device *ap_dev)
-{
-	struct zcrypt_device *zdev;
-	int rc;
-
-	zdev = zcrypt_device_alloc(PCICA_MAX_RESPONSE_SIZE);
-	if (!zdev)
-		return -ENOMEM;
-	zdev->ap_dev = ap_dev;
-	zdev->ops = &zcrypt_pcica_ops;
-	zdev->online = 1;
-	zdev->user_space_type = ZCRYPT_PCICA;
-	zdev->type_string = "PCICA";
-	zdev->min_mod_size = PCICA_MIN_MOD_SIZE;
-	zdev->max_mod_size = PCICA_MAX_MOD_SIZE;
-	zdev->speed_rating = PCICA_SPEED_RATING;
-	zdev->max_exp_bit_length = PCICA_MAX_MOD_SIZE;
-	ap_dev->reply = &zdev->reply;
-	ap_dev->private = zdev;
-	rc = zcrypt_device_register(zdev);
-	if (rc)
-		goto out_free;
-	return 0;
-
-out_free:
-	ap_dev->private = NULL;
-	zcrypt_device_free(zdev);
-	return rc;
-}
-
-/**
- * This is called to remove the extended PCICA driver information
- * if an AP device is removed.
- */
-static void zcrypt_pcica_remove(struct ap_device *ap_dev)
-{
-	struct zcrypt_device *zdev = ap_dev->private;
-
-	zcrypt_device_unregister(zdev);
-}
-
-int __init zcrypt_pcica_init(void)
-{
-	return ap_driver_register(&zcrypt_pcica_driver, THIS_MODULE, "pcica");
-}
-
-void zcrypt_pcica_exit(void)
-{
-	ap_driver_unregister(&zcrypt_pcica_driver);
-}
-
-module_init(zcrypt_pcica_init);
-module_exit(zcrypt_pcica_exit);
diff --git a/drivers/s390/crypto/zcrypt_pcica.h b/drivers/s390/crypto/zcrypt_pcica.h
deleted file mode 100644
index 9a59155..0000000
--- a/drivers/s390/crypto/zcrypt_pcica.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- *  zcrypt 2.1.0
- *
- *  Copyright IBM Corp. 2001, 2006
- *  Author(s): Robert Burroughs
- *	       Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _ZCRYPT_PCICA_H_
-#define _ZCRYPT_PCICA_H_
-
-/**
- * The type 4 message family is associated with a PCICA card.
- *
- * The four members of the family are described below.
- *
- * Note that all unsigned char arrays are right-justified and left-padded
- * with zeroes.
- *
- * Note that all reserved fields must be zeroes.
- */
-struct type4_hdr {
-	unsigned char  reserved1;
-	unsigned char  msg_type_code;	/* 0x04 */
-	unsigned short msg_len;
-	unsigned char  request_code;	/* 0x40 */
-	unsigned char  msg_fmt;
-	unsigned short reserved2;
-} __attribute__((packed));
-
-#define TYPE4_TYPE_CODE 0x04
-#define TYPE4_REQU_CODE 0x40
-
-#define TYPE4_SME_FMT 0x00
-#define TYPE4_LME_FMT 0x10
-#define TYPE4_SCR_FMT 0x40
-#define TYPE4_LCR_FMT 0x50
-
-/* Mod-Exp, with a small modulus */
-struct type4_sme {
-	struct type4_hdr header;
-	unsigned char	 message[128];
-	unsigned char	 exponent[128];
-	unsigned char	 modulus[128];
-} __attribute__((packed));
-
-/* Mod-Exp, with a large modulus */
-struct type4_lme {
-	struct type4_hdr header;
-	unsigned char	 message[256];
-	unsigned char	 exponent[256];
-	unsigned char	 modulus[256];
-} __attribute__((packed));
-
-/* CRT, with a small modulus */
-struct type4_scr {
-	struct type4_hdr header;
-	unsigned char	 message[128];
-	unsigned char	 dp[72];
-	unsigned char	 dq[64];
-	unsigned char	 p[72];
-	unsigned char	 q[64];
-	unsigned char	 u[72];
-} __attribute__((packed));
-
-/* CRT, with a large modulus */
-struct type4_lcr {
-	struct type4_hdr header;
-	unsigned char	 message[256];
-	unsigned char	 dp[136];
-	unsigned char	 dq[128];
-	unsigned char	 p[136];
-	unsigned char	 q[128];
-	unsigned char	 u[136];
-} __attribute__((packed));
-
-/**
- * The type 84 response family is associated with a PCICA card.
- *
- * Note that all unsigned char arrays are right-justified and left-padded
- * with zeroes.
- *
- * Note that all reserved fields must be zeroes.
- */
-
-struct type84_hdr {
-	unsigned char  reserved1;
-	unsigned char  code;
-	unsigned short len;
-	unsigned char  reserved2[4];
-} __attribute__((packed));
-
-#define TYPE84_RSP_CODE 0x84
-
-int zcrypt_pcica_init(void);
-void zcrypt_pcica_exit(void);
-
-#endif /* _ZCRYPT_PCICA_H_ */
diff --git a/drivers/s390/crypto/zcrypt_pcicc.c b/drivers/s390/crypto/zcrypt_pcicc.c
deleted file mode 100644
index 9f18876..0000000
--- a/drivers/s390/crypto/zcrypt_pcicc.c
+++ /dev/null
@@ -1,627 +0,0 @@
-/*
- *  zcrypt 2.1.0
- *
- *  Copyright IBM Corp. 2001, 2006
- *  Author(s): Robert Burroughs
- *	       Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
- *				  Ralph Wuerthner <rwuerthn@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define KMSG_COMPONENT "zcrypt"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/gfp.h>
-#include <linux/err.h>
-#include <linux/atomic.h>
-#include <asm/uaccess.h>
-
-#include "ap_bus.h"
-#include "zcrypt_api.h"
-#include "zcrypt_error.h"
-#include "zcrypt_pcicc.h"
-#include "zcrypt_cca_key.h"
-
-#define PCICC_MIN_MOD_SIZE	 64	/*  512 bits */
-#define PCICC_MAX_MOD_SIZE_OLD	128	/* 1024 bits */
-#define PCICC_MAX_MOD_SIZE	256	/* 2048 bits */
-
-/*
- * PCICC cards need a speed rating of 0. This keeps them at the end of
- * the zcrypt device list (see zcrypt_api.c). PCICC cards are only
- * used if no other cards are present because they are slow and can only
- * cope with PKCS12 padded requests. The logic is queer. PKCS11 padded
- * requests are rejected. The modexpo function encrypts PKCS12 padded data
- * and decrypts any non-PKCS12 padded data (except PKCS11) in the assumption
- * that it's encrypted PKCS12 data. The modexpo_crt function always decrypts
- * the data in the assumption that its PKCS12 encrypted data.
- */
-#define PCICC_SPEED_RATING	0
-
-#define PCICC_MAX_MESSAGE_SIZE 0x710	/* max size type6 v1 crt message */
-#define PCICC_MAX_RESPONSE_SIZE 0x710	/* max size type86 v1 reply	 */
-
-#define PCICC_CLEANUP_TIME	(15*HZ)
-
-static struct ap_device_id zcrypt_pcicc_ids[] = {
-	{ AP_DEVICE(AP_DEVICE_TYPE_PCICC) },
-	{ /* end of list */ },
-};
-
-MODULE_DEVICE_TABLE(ap, zcrypt_pcicc_ids);
-MODULE_AUTHOR("IBM Corporation");
-MODULE_DESCRIPTION("PCICC Cryptographic Coprocessor device driver, "
-		   "Copyright IBM Corp. 2001, 2006");
-MODULE_LICENSE("GPL");
-
-static int zcrypt_pcicc_probe(struct ap_device *ap_dev);
-static void zcrypt_pcicc_remove(struct ap_device *ap_dev);
-static void zcrypt_pcicc_receive(struct ap_device *, struct ap_message *,
-				 struct ap_message *);
-
-static struct ap_driver zcrypt_pcicc_driver = {
-	.probe = zcrypt_pcicc_probe,
-	.remove = zcrypt_pcicc_remove,
-	.ids = zcrypt_pcicc_ids,
-	.request_timeout = PCICC_CLEANUP_TIME,
-};
-
-/**
- * The following is used to initialize the CPRB passed to the PCICC card
- * in a type6 message. The 3 fields that must be filled in at execution
- * time are  req_parml, rpl_parml and usage_domain. Note that all three
- * fields are *little*-endian. Actually, everything about this interface
- * is ascii/little-endian, since the device has 'Intel inside'.
- *
- * The CPRB is followed immediately by the parm block.
- * The parm block contains:
- * - function code ('PD' 0x5044 or 'PK' 0x504B)
- * - rule block (0x0A00 'PKCS-1.2' or 0x0A00 'ZERO-PAD')
- * - VUD block
- */
-static struct CPRB static_cprb = {
-	.cprb_len	= cpu_to_le16(0x0070),
-	.cprb_ver_id	=  0x41,
-	.func_id	= {0x54,0x32},
-	.checkpoint_flag=  0x01,
-	.svr_namel	= cpu_to_le16(0x0008),
-	.svr_name	= {'I','C','S','F',' ',' ',' ',' '}
-};
-
-/**
- * Check the message for PKCS11 padding.
- */
-static inline int is_PKCS11_padded(unsigned char *buffer, int length)
-{
-	int i;
-	if ((buffer[0] != 0x00) || (buffer[1] != 0x01))
-		return 0;
-	for (i = 2; i < length; i++)
-		if (buffer[i] != 0xFF)
-			break;
-	if (i < 10 || i == length)
-		return 0;
-	if (buffer[i] != 0x00)
-		return 0;
-	return 1;
-}
-
-/**
- * Check the message for PKCS12 padding.
- */
-static inline int is_PKCS12_padded(unsigned char *buffer, int length)
-{
-	int i;
-	if ((buffer[0] != 0x00) || (buffer[1] != 0x02))
-		return 0;
-	for (i = 2; i < length; i++)
-		if (buffer[i] == 0x00)
-			break;
-	if ((i < 10) || (i == length))
-		return 0;
-	if (buffer[i] != 0x00)
-		return 0;
-	return 1;
-}
-
-/**
- * Convert a ICAMEX message to a type6 MEX message.
- *
- * @zdev: crypto device pointer
- * @zreq: crypto request pointer
- * @mex: pointer to user input data
- *
- * Returns 0 on success or -EFAULT.
- */
-static int ICAMEX_msg_to_type6MEX_msg(struct zcrypt_device *zdev,
-				      struct ap_message *ap_msg,
-				      struct ica_rsa_modexpo *mex)
-{
-	static struct type6_hdr static_type6_hdr = {
-		.type		=  0x06,
-		.offset1	=  0x00000058,
-		.agent_id	= {0x01,0x00,0x43,0x43,0x41,0x2D,0x41,0x50,
-				   0x50,0x4C,0x20,0x20,0x20,0x01,0x01,0x01},
-		.function_code	= {'P','K'},
-	};
-	static struct function_and_rules_block static_pke_function_and_rules ={
-		.function_code	= {'P','K'},
-		.ulen		= cpu_to_le16(10),
-		.only_rule	= {'P','K','C','S','-','1','.','2'}
-	};
-	struct {
-		struct type6_hdr hdr;
-		struct CPRB cprb;
-		struct function_and_rules_block fr;
-		unsigned short length;
-		char text[0];
-	} __attribute__((packed)) *msg = ap_msg->message;
-	int vud_len, pad_len, size;
-
-	/* VUD.ciphertext */
-	if (copy_from_user(msg->text, mex->inputdata, mex->inputdatalength))
-		return -EFAULT;
-
-	if (is_PKCS11_padded(msg->text, mex->inputdatalength))
-		return -EINVAL;
-
-	/* static message header and f&r */
-	msg->hdr = static_type6_hdr;
-	msg->fr = static_pke_function_and_rules;
-
-	if (is_PKCS12_padded(msg->text, mex->inputdatalength)) {
-		/* strip the padding and adjust the data length */
-		pad_len = strnlen(msg->text + 2, mex->inputdatalength - 2) + 3;
-		if (pad_len <= 9 || pad_len >= mex->inputdatalength)
-			return -ENODEV;
-		vud_len = mex->inputdatalength - pad_len;
-		memmove(msg->text, msg->text + pad_len, vud_len);
-		msg->length = cpu_to_le16(vud_len + 2);
-
-		/* Set up key after the variable length text. */
-		size = zcrypt_type6_mex_key_en(mex, msg->text + vud_len, 0);
-		if (size < 0)
-			return size;
-		size += sizeof(*msg) + vud_len;	/* total size of msg */
-	} else {
-		vud_len = mex->inputdatalength;
-		msg->length = cpu_to_le16(2 + vud_len);
-
-		msg->hdr.function_code[1] = 'D';
-		msg->fr.function_code[1] = 'D';
-
-		/* Set up key after the variable length text. */
-		size = zcrypt_type6_mex_key_de(mex, msg->text + vud_len, 0);
-		if (size < 0)
-			return size;
-		size += sizeof(*msg) + vud_len;	/* total size of msg */
-	}
-
-	/* message header, cprb and f&r */
-	msg->hdr.ToCardLen1 = (size - sizeof(msg->hdr) + 3) & -4;
-	msg->hdr.FromCardLen1 = PCICC_MAX_RESPONSE_SIZE - sizeof(msg->hdr);
-
-	msg->cprb = static_cprb;
-	msg->cprb.usage_domain[0]= AP_QID_QUEUE(zdev->ap_dev->qid);
-	msg->cprb.req_parml = cpu_to_le16(size - sizeof(msg->hdr) -
-					   sizeof(msg->cprb));
-	msg->cprb.rpl_parml = cpu_to_le16(msg->hdr.FromCardLen1);
-
-	ap_msg->length = (size + 3) & -4;
-	return 0;
-}
-
-/**
- * Convert a ICACRT message to a type6 CRT message.
- *
- * @zdev: crypto device pointer
- * @zreq: crypto request pointer
- * @crt: pointer to user input data
- *
- * Returns 0 on success or -EFAULT.
- */
-static int ICACRT_msg_to_type6CRT_msg(struct zcrypt_device *zdev,
-				      struct ap_message *ap_msg,
-				      struct ica_rsa_modexpo_crt *crt)
-{
-	static struct type6_hdr static_type6_hdr = {
-		.type		=  0x06,
-		.offset1	=  0x00000058,
-		.agent_id	= {0x01,0x00,0x43,0x43,0x41,0x2D,0x41,0x50,
-				   0x50,0x4C,0x20,0x20,0x20,0x01,0x01,0x01},
-		.function_code	= {'P','D'},
-	};
-	static struct function_and_rules_block static_pkd_function_and_rules ={
-		.function_code	= {'P','D'},
-		.ulen		= cpu_to_le16(10),
-		.only_rule	= {'P','K','C','S','-','1','.','2'}
-	};
-	struct {
-		struct type6_hdr hdr;
-		struct CPRB cprb;
-		struct function_and_rules_block fr;
-		unsigned short length;
-		char text[0];
-	} __attribute__((packed)) *msg = ap_msg->message;
-	int size;
-
-	/* VUD.ciphertext */
-	msg->length = cpu_to_le16(2 + crt->inputdatalength);
-	if (copy_from_user(msg->text, crt->inputdata, crt->inputdatalength))
-		return -EFAULT;
-
-	if (is_PKCS11_padded(msg->text, crt->inputdatalength))
-		return -EINVAL;
-
-	/* Set up key after the variable length text. */
-	size = zcrypt_type6_crt_key(crt, msg->text + crt->inputdatalength, 0);
-	if (size < 0)
-		return size;
-	size += sizeof(*msg) + crt->inputdatalength;	/* total size of msg */
-
-	/* message header, cprb and f&r */
-	msg->hdr = static_type6_hdr;
-	msg->hdr.ToCardLen1 = (size -  sizeof(msg->hdr) + 3) & -4;
-	msg->hdr.FromCardLen1 = PCICC_MAX_RESPONSE_SIZE - sizeof(msg->hdr);
-
-	msg->cprb = static_cprb;
-	msg->cprb.usage_domain[0] = AP_QID_QUEUE(zdev->ap_dev->qid);
-	msg->cprb.req_parml = msg->cprb.rpl_parml =
-		cpu_to_le16(size - sizeof(msg->hdr) - sizeof(msg->cprb));
-
-	msg->fr = static_pkd_function_and_rules;
-
-	ap_msg->length = (size + 3) & -4;
-	return 0;
-}
-
-/**
- * Copy results from a type 86 reply message back to user space.
- *
- * @zdev: crypto device pointer
- * @reply: reply AP message.
- * @data: pointer to user output data
- * @length: size of user output data
- *
- * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error.
- */
-struct type86_reply {
-	struct type86_hdr hdr;
-	struct type86_fmt2_ext fmt2;
-	struct CPRB cprb;
-	unsigned char pad[4];	/* 4 byte function code/rules block ? */
-	unsigned short length;
-	char text[0];
-} __attribute__((packed));
-
-static int convert_type86(struct zcrypt_device *zdev,
-			  struct ap_message *reply,
-			  char __user *outputdata,
-			  unsigned int outputdatalength)
-{
-	static unsigned char static_pad[] = {
-		0x00,0x02,
-		0x1B,0x7B,0x5D,0xB5,0x75,0x01,0x3D,0xFD,
-		0x8D,0xD1,0xC7,0x03,0x2D,0x09,0x23,0x57,
-		0x89,0x49,0xB9,0x3F,0xBB,0x99,0x41,0x5B,
-		0x75,0x21,0x7B,0x9D,0x3B,0x6B,0x51,0x39,
-		0xBB,0x0D,0x35,0xB9,0x89,0x0F,0x93,0xA5,
-		0x0B,0x47,0xF1,0xD3,0xBB,0xCB,0xF1,0x9D,
-		0x23,0x73,0x71,0xFF,0xF3,0xF5,0x45,0xFB,
-		0x61,0x29,0x23,0xFD,0xF1,0x29,0x3F,0x7F,
-		0x17,0xB7,0x1B,0xA9,0x19,0xBD,0x57,0xA9,
-		0xD7,0x95,0xA3,0xCB,0xED,0x1D,0xDB,0x45,
-		0x7D,0x11,0xD1,0x51,0x1B,0xED,0x71,0xE9,
-		0xB1,0xD1,0xAB,0xAB,0x21,0x2B,0x1B,0x9F,
-		0x3B,0x9F,0xF7,0xF7,0xBD,0x63,0xEB,0xAD,
-		0xDF,0xB3,0x6F,0x5B,0xDB,0x8D,0xA9,0x5D,
-		0xE3,0x7D,0x77,0x49,0x47,0xF5,0xA7,0xFD,
-		0xAB,0x2F,0x27,0x35,0x77,0xD3,0x49,0xC9,
-		0x09,0xEB,0xB1,0xF9,0xBF,0x4B,0xCB,0x2B,
-		0xEB,0xEB,0x05,0xFF,0x7D,0xC7,0x91,0x8B,
-		0x09,0x83,0xB9,0xB9,0x69,0x33,0x39,0x6B,
-		0x79,0x75,0x19,0xBF,0xBB,0x07,0x1D,0xBD,
-		0x29,0xBF,0x39,0x95,0x93,0x1D,0x35,0xC7,
-		0xC9,0x4D,0xE5,0x97,0x0B,0x43,0x9B,0xF1,
-		0x16,0x93,0x03,0x1F,0xA5,0xFB,0xDB,0xF3,
-		0x27,0x4F,0x27,0x61,0x05,0x1F,0xB9,0x23,
-		0x2F,0xC3,0x81,0xA9,0x23,0x71,0x55,0x55,
-		0xEB,0xED,0x41,0xE5,0xF3,0x11,0xF1,0x43,
-		0x69,0x03,0xBD,0x0B,0x37,0x0F,0x51,0x8F,
-		0x0B,0xB5,0x89,0x5B,0x67,0xA9,0xD9,0x4F,
-		0x01,0xF9,0x21,0x77,0x37,0x73,0x79,0xC5,
-		0x7F,0x51,0xC1,0xCF,0x97,0xA1,0x75,0xAD,
-		0x35,0x9D,0xD3,0xD3,0xA7,0x9D,0x5D,0x41,
-		0x6F,0x65,0x1B,0xCF,0xA9,0x87,0x91,0x09
-	};
-	struct type86_reply *msg = reply->message;
-	unsigned short service_rc, service_rs;
-	unsigned int reply_len, pad_len;
-	char *data;
-
-	service_rc = le16_to_cpu(msg->cprb.ccp_rtcode);
-	if (unlikely(service_rc != 0)) {
-		service_rs = le16_to_cpu(msg->cprb.ccp_rscode);
-		if (service_rc == 8 && service_rs == 66)
-			return -EINVAL;
-		if (service_rc == 8 && service_rs == 65)
-			return -EINVAL;
-		if (service_rc == 8 && service_rs == 770) {
-			zdev->max_mod_size = PCICC_MAX_MOD_SIZE_OLD;
-			return -EAGAIN;
-		}
-		if (service_rc == 8 && service_rs == 783) {
-			zdev->max_mod_size = PCICC_MAX_MOD_SIZE_OLD;
-			return -EAGAIN;
-		}
-		if (service_rc == 8 && service_rs == 72)
-			return -EINVAL;
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d",
-			       zdev->ap_dev->qid, zdev->online,
-			       msg->hdr.reply_code);
-		return -EAGAIN;	/* repeat the request on a different device. */
-	}
-	data = msg->text;
-	reply_len = le16_to_cpu(msg->length) - 2;
-	if (reply_len > outputdatalength)
-		return -EINVAL;
-	/*
-	 * For all encipher requests, the length of the ciphertext (reply_len)
-	 * will always equal the modulus length. For MEX decipher requests
-	 * the output needs to get padded. Minimum pad size is 10.
-	 *
-	 * Currently, the cases where padding will be added is for:
-	 * - PCIXCC_MCL2 using a CRT form token (since PKD didn't support
-	 *   ZERO-PAD and CRT is only supported for PKD requests)
-	 * - PCICC, always
-	 */
-	pad_len = outputdatalength - reply_len;
-	if (pad_len > 0) {
-		if (pad_len < 10)
-			return -EINVAL;
-		/* 'restore' padding left in the PCICC/PCIXCC card. */
-		if (copy_to_user(outputdata, static_pad, pad_len - 1))
-			return -EFAULT;
-		if (put_user(0, outputdata + pad_len - 1))
-			return -EFAULT;
-	}
-	/* Copy the crypto response to user space. */
-	if (copy_to_user(outputdata + pad_len, data, reply_len))
-		return -EFAULT;
-	return 0;
-}
-
-static int convert_response(struct zcrypt_device *zdev,
-			    struct ap_message *reply,
-			    char __user *outputdata,
-			    unsigned int outputdatalength)
-{
-	struct type86_reply *msg = reply->message;
-
-	/* Response type byte is the second byte in the response. */
-	switch (msg->hdr.type) {
-	case TYPE82_RSP_CODE:
-	case TYPE88_RSP_CODE:
-		return convert_error(zdev, reply);
-	case TYPE86_RSP_CODE:
-		if (msg->hdr.reply_code)
-			return convert_error(zdev, reply);
-		if (msg->cprb.cprb_ver_id == 0x01)
-			return convert_type86(zdev, reply,
-					      outputdata, outputdatalength);
-		/* no break, incorrect cprb version is an unknown response */
-	default: /* Unknown response type, this should NEVER EVER happen */
-		zdev->online = 0;
-		pr_err("Cryptographic device %x failed and was set offline\n",
-		       zdev->ap_dev->qid);
-		ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail",
-			       zdev->ap_dev->qid, zdev->online);
-		return -EAGAIN;	/* repeat the request on a different device. */
-	}
-}
-
-/**
- * This function is called from the AP bus code after a crypto request
- * "msg" has finished with the reply message "reply".
- * It is called from tasklet context.
- * @ap_dev: pointer to the AP device
- * @msg: pointer to the AP message
- * @reply: pointer to the AP reply message
- */
-static void zcrypt_pcicc_receive(struct ap_device *ap_dev,
-				 struct ap_message *msg,
-				 struct ap_message *reply)
-{
-	static struct error_hdr error_reply = {
-		.type = TYPE82_RSP_CODE,
-		.reply_code = REP82_ERROR_MACHINE_FAILURE,
-	};
-	struct type86_reply *t86r;
-	int length;
-
-	/* Copy the reply message to the request message buffer. */
-	if (IS_ERR(reply)) {
-		memcpy(msg->message, &error_reply, sizeof(error_reply));
-		goto out;
-	}
-	t86r = reply->message;
-	if (t86r->hdr.type == TYPE86_RSP_CODE &&
-		 t86r->cprb.cprb_ver_id == 0x01) {
-		length = sizeof(struct type86_reply) + t86r->length - 2;
-		length = min(PCICC_MAX_RESPONSE_SIZE, length);
-		memcpy(msg->message, reply->message, length);
-	} else
-		memcpy(msg->message, reply->message, sizeof error_reply);
-out:
-	complete((struct completion *) msg->private);
-}
-
-static atomic_t zcrypt_step = ATOMIC_INIT(0);
-
-/**
- * The request distributor calls this function if it picked the PCICC
- * device to handle a modexpo request.
- * @zdev: pointer to zcrypt_device structure that identifies the
- *	  PCICC device to the request distributor
- * @mex: pointer to the modexpo request buffer
- */
-static long zcrypt_pcicc_modexpo(struct zcrypt_device *zdev,
-				 struct ica_rsa_modexpo *mex)
-{
-	struct ap_message ap_msg;
-	struct completion work;
-	int rc;
-
-	ap_init_message(&ap_msg);
-	ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
-	if (!ap_msg.message)
-		return -ENOMEM;
-	ap_msg.receive = zcrypt_pcicc_receive;
-	ap_msg.length = PAGE_SIZE;
-	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
-				atomic_inc_return(&zcrypt_step);
-	ap_msg.private = &work;
-	rc = ICAMEX_msg_to_type6MEX_msg(zdev, &ap_msg, mex);
-	if (rc)
-		goto out_free;
-	init_completion(&work);
-	ap_queue_message(zdev->ap_dev, &ap_msg);
-	rc = wait_for_completion_interruptible(&work);
-	if (rc == 0)
-		rc = convert_response(zdev, &ap_msg, mex->outputdata,
-				      mex->outputdatalength);
-	else
-		/* Signal pending. */
-		ap_cancel_message(zdev->ap_dev, &ap_msg);
-out_free:
-	free_page((unsigned long) ap_msg.message);
-	return rc;
-}
-
-/**
- * The request distributor calls this function if it picked the PCICC
- * device to handle a modexpo_crt request.
- * @zdev: pointer to zcrypt_device structure that identifies the
- *	  PCICC device to the request distributor
- * @crt: pointer to the modexpoc_crt request buffer
- */
-static long zcrypt_pcicc_modexpo_crt(struct zcrypt_device *zdev,
-				     struct ica_rsa_modexpo_crt *crt)
-{
-	struct ap_message ap_msg;
-	struct completion work;
-	int rc;
-
-	ap_init_message(&ap_msg);
-	ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
-	if (!ap_msg.message)
-		return -ENOMEM;
-	ap_msg.receive = zcrypt_pcicc_receive;
-	ap_msg.length = PAGE_SIZE;
-	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
-				atomic_inc_return(&zcrypt_step);
-	ap_msg.private = &work;
-	rc = ICACRT_msg_to_type6CRT_msg(zdev, &ap_msg, crt);
-	if (rc)
-		goto out_free;
-	init_completion(&work);
-	ap_queue_message(zdev->ap_dev, &ap_msg);
-	rc = wait_for_completion_interruptible(&work);
-	if (rc == 0)
-		rc = convert_response(zdev, &ap_msg, crt->outputdata,
-				      crt->outputdatalength);
-	else
-		/* Signal pending. */
-		ap_cancel_message(zdev->ap_dev, &ap_msg);
-out_free:
-	free_page((unsigned long) ap_msg.message);
-	return rc;
-}
-
-/**
- * The crypto operations for a PCICC card.
- */
-static struct zcrypt_ops zcrypt_pcicc_ops = {
-	.rsa_modexpo = zcrypt_pcicc_modexpo,
-	.rsa_modexpo_crt = zcrypt_pcicc_modexpo_crt,
-};
-
-/**
- * Probe function for PCICC cards. It always accepts the AP device
- * since the bus_match already checked the hardware type.
- * @ap_dev: pointer to the AP device.
- */
-static int zcrypt_pcicc_probe(struct ap_device *ap_dev)
-{
-	struct zcrypt_device *zdev;
-	int rc;
-
-	zdev = zcrypt_device_alloc(PCICC_MAX_RESPONSE_SIZE);
-	if (!zdev)
-		return -ENOMEM;
-	zdev->ap_dev = ap_dev;
-	zdev->ops = &zcrypt_pcicc_ops;
-	zdev->online = 1;
-	zdev->user_space_type = ZCRYPT_PCICC;
-	zdev->type_string = "PCICC";
-	zdev->min_mod_size = PCICC_MIN_MOD_SIZE;
-	zdev->max_mod_size = PCICC_MAX_MOD_SIZE;
-	zdev->speed_rating = PCICC_SPEED_RATING;
-	zdev->max_exp_bit_length = PCICC_MAX_MOD_SIZE;
-	ap_dev->reply = &zdev->reply;
-	ap_dev->private = zdev;
-	rc = zcrypt_device_register(zdev);
-	if (rc)
-		goto out_free;
-	return 0;
-
- out_free:
-	ap_dev->private = NULL;
-	zcrypt_device_free(zdev);
-	return rc;
-}
-
-/**
- * This is called to remove the extended PCICC driver information
- * if an AP device is removed.
- */
-static void zcrypt_pcicc_remove(struct ap_device *ap_dev)
-{
-	struct zcrypt_device *zdev = ap_dev->private;
-
-	zcrypt_device_unregister(zdev);
-}
-
-int __init zcrypt_pcicc_init(void)
-{
-	return ap_driver_register(&zcrypt_pcicc_driver, THIS_MODULE, "pcicc");
-}
-
-void zcrypt_pcicc_exit(void)
-{
-	ap_driver_unregister(&zcrypt_pcicc_driver);
-}
-
-module_init(zcrypt_pcicc_init);
-module_exit(zcrypt_pcicc_exit);
diff --git a/drivers/s390/crypto/zcrypt_pcicc.h b/drivers/s390/crypto/zcrypt_pcicc.h
deleted file mode 100644
index 7fe27e1..0000000
--- a/drivers/s390/crypto/zcrypt_pcicc.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- *  zcrypt 2.1.0
- *
- *  Copyright IBM Corp. 2001, 2006
- *  Author(s): Robert Burroughs
- *	       Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _ZCRYPT_PCICC_H_
-#define _ZCRYPT_PCICC_H_
-
-/**
- * The type 6 message family is associated with PCICC or PCIXCC cards.
- *
- * It contains a message header followed by a CPRB, both of which
- * are described below.
- *
- * Note that all reserved fields must be zeroes.
- */
-struct type6_hdr {
-	unsigned char reserved1;	/* 0x00				*/
-	unsigned char type;		/* 0x06				*/
-	unsigned char reserved2[2];	/* 0x0000			*/
-	unsigned char right[4];		/* 0x00000000			*/
-	unsigned char reserved3[2];	/* 0x0000			*/
-	unsigned char reserved4[2];	/* 0x0000			*/
-	unsigned char apfs[4];		/* 0x00000000			*/
-	unsigned int  offset1;		/* 0x00000058 (offset to CPRB)	*/
-	unsigned int  offset2;		/* 0x00000000			*/
-	unsigned int  offset3;		/* 0x00000000			*/
-	unsigned int  offset4;		/* 0x00000000			*/
-	unsigned char agent_id[16];	/* PCICC:			*/
-					/*    0x0100			*/
-					/*    0x4343412d4150504c202020	*/
-					/*    0x010101			*/
-					/* PCIXCC:			*/
-					/*    0x4341000000000000	*/
-					/*    0x0000000000000000	*/
-	unsigned char rqid[2];		/* rqid.  internal to 603	*/
-	unsigned char reserved5[2];	/* 0x0000			*/
-	unsigned char function_code[2];	/* for PKD, 0x5044 (ascii 'PD')	*/
-	unsigned char reserved6[2];	/* 0x0000			*/
-	unsigned int  ToCardLen1;	/* (request CPRB len + 3) & -4	*/
-	unsigned int  ToCardLen2;	/* db len 0x00000000 for PKD	*/
-	unsigned int  ToCardLen3;	/* 0x00000000			*/
-	unsigned int  ToCardLen4;	/* 0x00000000			*/
-	unsigned int  FromCardLen1;	/* response buffer length	*/
-	unsigned int  FromCardLen2;	/* db len 0x00000000 for PKD	*/
-	unsigned int  FromCardLen3;	/* 0x00000000			*/
-	unsigned int  FromCardLen4;	/* 0x00000000			*/
-} __attribute__((packed));
-
-/**
- * CPRB
- *	  Note that all shorts, ints and longs are little-endian.
- *	  All pointer fields are 32-bits long, and mean nothing
- *
- *	  A request CPRB is followed by a request_parameter_block.
- *
- *	  The request (or reply) parameter block is organized thus:
- *	    function code
- *	    VUD block
- *	    key block
- */
-struct CPRB {
-	unsigned short cprb_len;	/* CPRB length			 */
-	unsigned char cprb_ver_id;	/* CPRB version id.		 */
-	unsigned char pad_000;		/* Alignment pad byte.		 */
-	unsigned char srpi_rtcode[4];	/* SRPI return code LELONG	 */
-	unsigned char srpi_verb;	/* SRPI verb type		 */
-	unsigned char flags;		/* flags			 */
-	unsigned char func_id[2];	/* function id			 */
-	unsigned char checkpoint_flag;	/*				 */
-	unsigned char resv2;		/* reserved			 */
-	unsigned short req_parml;	/* request parameter buffer	 */
-					/* length 16-bit little endian	 */
-	unsigned char req_parmp[4];	/* request parameter buffer	 *
-					 * pointer (means nothing: the	 *
-					 * parameter buffer follows	 *
-					 * the CPRB).			 */
-	unsigned char req_datal[4];	/* request data buffer		 */
-					/* length	  ULELONG	 */
-	unsigned char req_datap[4];	/* request data buffer		 */
-					/* pointer			 */
-	unsigned short rpl_parml;	/* reply  parameter buffer	 */
-					/* length 16-bit little endian	 */
-	unsigned char pad_001[2];	/* Alignment pad bytes. ULESHORT */
-	unsigned char rpl_parmp[4];	/* reply parameter buffer	 *
-					 * pointer (means nothing: the	 *
-					 * parameter buffer follows	 *
-					 * the CPRB).			 */
-	unsigned char rpl_datal[4];	/* reply data buffer len ULELONG */
-	unsigned char rpl_datap[4];	/* reply data buffer		 */
-					/* pointer			 */
-	unsigned short ccp_rscode;	/* server reason code	ULESHORT */
-	unsigned short ccp_rtcode;	/* server return code	ULESHORT */
-	unsigned char repd_parml[2];	/* replied parameter len ULESHORT*/
-	unsigned char mac_data_len[2];	/* Mac Data Length	ULESHORT */
-	unsigned char repd_datal[4];	/* replied data length	ULELONG	 */
-	unsigned char req_pc[2];	/* PC identifier		 */
-	unsigned char res_origin[8];	/* resource origin		 */
-	unsigned char mac_value[8];	/* Mac Value			 */
-	unsigned char logon_id[8];	/* Logon Identifier		 */
-	unsigned char usage_domain[2];	/* cdx				 */
-	unsigned char resv3[18];	/* reserved for requestor	 */
-	unsigned short svr_namel;	/* server name length  ULESHORT	 */
-	unsigned char svr_name[8];	/* server name			 */
-} __attribute__((packed));
-
-/**
- * The type 86 message family is associated with PCICC and PCIXCC cards.
- *
- * It contains a message header followed by a CPRB.  The CPRB is
- * the same as the request CPRB, which is described above.
- *
- * If format is 1, an error condition exists and no data beyond
- * the 8-byte message header is of interest.
- *
- * The non-error message is shown below.
- *
- * Note that all reserved fields must be zeroes.
- */
-struct type86_hdr {
-	unsigned char reserved1;	/* 0x00				*/
-	unsigned char type;		/* 0x86				*/
-	unsigned char format;		/* 0x01 (error) or 0x02 (ok)	*/
-	unsigned char reserved2;	/* 0x00				*/
-	unsigned char reply_code;	/* reply code (see above)	*/
-	unsigned char reserved3[3];	/* 0x000000			*/
-} __attribute__((packed));
-
-#define TYPE86_RSP_CODE 0x86
-#define TYPE86_FMT2	0x02
-
-struct type86_fmt2_ext {
-	unsigned char	  reserved[4];	/* 0x00000000			*/
-	unsigned char	  apfs[4];	/* final status			*/
-	unsigned int	  count1;	/* length of CPRB + parameters	*/
-	unsigned int	  offset1;	/* offset to CPRB		*/
-	unsigned int	  count2;	/* 0x00000000			*/
-	unsigned int	  offset2;	/* db offset 0x00000000 for PKD	*/
-	unsigned int	  count3;	/* 0x00000000			*/
-	unsigned int	  offset3;	/* 0x00000000			*/
-	unsigned int	  count4;	/* 0x00000000			*/
-	unsigned int	  offset4;	/* 0x00000000			*/
-} __attribute__((packed));
-
-struct function_and_rules_block {
-	unsigned char function_code[2];
-	unsigned short ulen;
-	unsigned char only_rule[8];
-} __attribute__((packed));
-
-int zcrypt_pcicc_init(void);
-void zcrypt_pcicc_exit(void);
-
-#endif /* _ZCRYPT_PCICC_H_ */
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index e9fae30..b2a1a81 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -28,6 +28,7 @@
 #include <linux/io.h>
 #include <linux/kvm_para.h>
 #include <linux/notifier.h>
+#include <asm/diag.h>
 #include <asm/setup.h>
 #include <asm/irq.h>
 #include <asm/cio.h>
@@ -366,9 +367,9 @@
 	kfree(thinint_area);
 }
 
-static inline long do_kvm_notify(struct subchannel_id schid,
-				 unsigned long queue_index,
-				 long cookie)
+static inline long __do_kvm_notify(struct subchannel_id schid,
+				   unsigned long queue_index,
+				   long cookie)
 {
 	register unsigned long __nr asm("1") = KVM_S390_VIRTIO_CCW_NOTIFY;
 	register struct subchannel_id __schid asm("2") = schid;
@@ -383,6 +384,14 @@
 	return __rc;
 }
 
+static inline long do_kvm_notify(struct subchannel_id schid,
+				 unsigned long queue_index,
+				 long cookie)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __do_kvm_notify(schid, queue_index, cookie);
+}
+
 static bool virtio_ccw_kvm_notify(struct virtqueue *vq)
 {
 	struct virtio_ccw_vq_info *info = vq->priv;
diff --git a/drivers/watchdog/diag288_wdt.c b/drivers/watchdog/diag288_wdt.c
index a9a5210..3db9d0e 100644
--- a/drivers/watchdog/diag288_wdt.c
+++ b/drivers/watchdog/diag288_wdt.c
@@ -29,6 +29,7 @@
 #include <linux/watchdog.h>
 #include <linux/suspend.h>
 #include <asm/ebcdic.h>
+#include <asm/diag.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
 
@@ -94,12 +95,14 @@
 static int __diag288_vm(unsigned int  func, unsigned int timeout,
 			char *cmd, size_t len)
 {
+	diag_stat_inc(DIAG_STAT_X288);
 	return __diag288(func, timeout, virt_to_phys(cmd), len);
 }
 
 static int __diag288_lpar(unsigned int func, unsigned int timeout,
 			  unsigned long action)
 {
+	diag_stat_inc(DIAG_STAT_X288);
 	return __diag288(func, timeout, action, 0);
 }
 
@@ -141,6 +144,7 @@
 {
 	int ret;
 
+	diag_stat_inc(DIAG_STAT_X288);
 	ret = __diag288(WDT_FUNC_CANCEL, 0, 0, 0);
 	return ret;
 }
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e2d46ad..b029d42 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -754,7 +754,7 @@
 
 	if (pte_present(ptent)) {
 		ptent = pte_wrprotect(ptent);
-		ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+		ptent = pte_clear_soft_dirty(ptent);
 	} else if (is_swap_pte(ptent)) {
 		ptent = pte_swp_clear_soft_dirty(ptent);
 	}
@@ -768,7 +768,7 @@
 	pmd_t pmd = *pmdp;
 
 	pmd = pmd_wrprotect(pmd);
-	pmd = pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
+	pmd = pmd_clear_soft_dirty(pmd);
 
 	if (vma->vm_flags & VM_SOFTDIRTY)
 		vma->vm_flags &= ~VM_SOFTDIRTY;
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 3eabbbb..14b0ff32 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -505,6 +505,16 @@
 	return pmd;
 }
 
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+	return pte;
+}
+
+static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
+{
+	return pmd;
+}
+
 static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
 {
 	return pte;
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index fe81743..52a459f 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -56,7 +56,7 @@
 #include <linux/compiler-gcc.h>
 #endif
 
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
 #define notrace __attribute__((hotpatch(0,0)))
 #else
 #define notrace __attribute__((no_instrument_function))