[S390] Inline assembly cleanup.

Major cleanup of all s390 inline assemblies. They now have a common
coding style. Quite a few have been shortened, mainly by using register
asm variables. Use of the EX_TABLE macro helps  as well. The atomic ops,
bit ops and locking inlines new use the Q-constraint if a newer gcc
is used.  That results in slightly better code.

Thanks to Christian Borntraeger for proof reading the changes.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/include/asm-s390/appldata.h b/include/asm-s390/appldata.h
index b177070..79283da 100644
--- a/include/asm-s390/appldata.h
+++ b/include/asm-s390/appldata.h
@@ -80,7 +80,7 @@
 	parm_list.product_id_addr = (unsigned long) id;
 	parm_list.buffer_addr = virt_to_phys(buffer);
 	asm volatile(
-		"diag %1,%0,0xdc"
+		"	diag	%1,%0,0xdc"
 		: "=d" (ry)
 		: "d" (&parm_list), "m" (parm_list), "m" (*id)
 		: "cc");
diff --git a/include/asm-s390/atomic.h b/include/asm-s390/atomic.h
index 399bf02..af20c74 100644
--- a/include/asm-s390/atomic.h
+++ b/include/asm-s390/atomic.h
@@ -30,20 +30,43 @@
 
 #ifdef __KERNEL__
 
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+
 #define __CS_LOOP(ptr, op_val, op_string) ({				\
 	typeof(ptr->counter) old_val, new_val;				\
-        __asm__ __volatile__("   l     %0,0(%3)\n"			\
-                             "0: lr    %1,%0\n"				\
-                             op_string "  %1,%4\n"			\
-                             "   cs    %0,%1,0(%3)\n"			\
-                             "   jl    0b"				\
-                             : "=&d" (old_val), "=&d" (new_val),	\
-			       "=m" (((atomic_t *)(ptr))->counter)	\
-			     : "a" (ptr), "d" (op_val),			\
-			       "m" (((atomic_t *)(ptr))->counter)	\
-			     : "cc", "memory" );			\
+	asm volatile(							\
+		"	l	%0,%2\n"				\
+		"0:	lr	%1,%0\n"				\
+		op_string "	%1,%3\n"				\
+		"	cs	%0,%1,%2\n"				\
+		"	jl	0b"					\
+		: "=&d" (old_val), "=&d" (new_val),			\
+		  "=Q" (((atomic_t *)(ptr))->counter)			\
+		: "d" (op_val),	 "Q" (((atomic_t *)(ptr))->counter)	\
+		: "cc", "memory");					\
 	new_val;							\
 })
+
+#else /* __GNUC__ */
+
+#define __CS_LOOP(ptr, op_val, op_string) ({				\
+	typeof(ptr->counter) old_val, new_val;				\
+	asm volatile(							\
+		"	l	%0,0(%3)\n"				\
+		"0:	lr	%1,%0\n"				\
+		op_string "	%1,%4\n"				\
+		"	cs	%0,%1,0(%3)\n"				\
+		"	jl	0b"					\
+		: "=&d" (old_val), "=&d" (new_val),			\
+		  "=m" (((atomic_t *)(ptr))->counter)			\
+		: "a" (ptr), "d" (op_val),				\
+		  "m" (((atomic_t *)(ptr))->counter)			\
+		: "cc", "memory");					\
+	new_val;							\
+})
+
+#endif /* __GNUC__ */
+
 #define atomic_read(v)          ((v)->counter)
 #define atomic_set(v,i)         (((v)->counter) = (i))
 
@@ -81,10 +104,19 @@
 
 static __inline__ int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
-	__asm__ __volatile__("  cs   %0,%3,0(%2)\n"
-			     : "+d" (old), "=m" (v->counter)
-			     : "a" (v), "d" (new), "m" (v->counter)
-			     : "cc", "memory" );
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+	asm volatile(
+		"	cs	%0,%2,%1"
+		: "+d" (old), "=Q" (v->counter)
+		: "d" (new), "Q" (v->counter)
+		: "cc", "memory");
+#else /* __GNUC__ */
+	asm volatile(
+		"	cs	%0,%3,0(%2)"
+		: "+d" (old), "=m" (v->counter)
+		: "a" (v), "d" (new), "m" (v->counter)
+		: "cc", "memory");
+#endif /* __GNUC__ */
 	return old;
 }
 
@@ -113,20 +145,43 @@
 } __attribute__ ((aligned (8))) atomic64_t;
 #define ATOMIC64_INIT(i)  { (i) }
 
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+
 #define __CSG_LOOP(ptr, op_val, op_string) ({				\
 	typeof(ptr->counter) old_val, new_val;				\
-        __asm__ __volatile__("   lg    %0,0(%3)\n"			\
-                             "0: lgr   %1,%0\n"				\
-                             op_string "  %1,%4\n"			\
-                             "   csg   %0,%1,0(%3)\n"			\
-                             "   jl    0b"				\
-                             : "=&d" (old_val), "=&d" (new_val),	\
-			       "=m" (((atomic_t *)(ptr))->counter)	\
-			     : "a" (ptr), "d" (op_val),			\
-			       "m" (((atomic_t *)(ptr))->counter)	\
-			     : "cc", "memory" );			\
+	asm volatile(							\
+		"	lg	%0,%2\n"				\
+		"0:	lgr	%1,%0\n"				\
+		op_string "	%1,%3\n"				\
+		"	csg	%0,%1,%2\n"				\
+		"	jl	0b"					\
+		: "=&d" (old_val), "=&d" (new_val),			\
+		  "=Q" (((atomic_t *)(ptr))->counter)			\
+		: "d" (op_val),	"Q" (((atomic_t *)(ptr))->counter)	\
+		: "cc", "memory" );					\
 	new_val;							\
 })
+
+#else /* __GNUC__ */
+
+#define __CSG_LOOP(ptr, op_val, op_string) ({				\
+	typeof(ptr->counter) old_val, new_val;				\
+	asm volatile(							\
+		"	lg	%0,0(%3)\n"				\
+		"0:	lgr	%1,%0\n"				\
+		op_string "	%1,%4\n"				\
+		"	csg	%0,%1,0(%3)\n"				\
+		"	jl	0b"					\
+		: "=&d" (old_val), "=&d" (new_val),			\
+		  "=m" (((atomic_t *)(ptr))->counter)			\
+		: "a" (ptr), "d" (op_val),				\
+		  "m" (((atomic_t *)(ptr))->counter)			\
+		: "cc", "memory" );					\
+	new_val;							\
+})
+
+#endif /* __GNUC__ */
+
 #define atomic64_read(v)          ((v)->counter)
 #define atomic64_set(v,i)         (((v)->counter) = (i))
 
@@ -163,10 +218,19 @@
 static __inline__ long long atomic64_cmpxchg(atomic64_t *v,
 					     long long old, long long new)
 {
-	__asm__ __volatile__("  csg  %0,%3,0(%2)\n"
-			     : "+d" (old), "=m" (v->counter)
-			     : "a" (v), "d" (new), "m" (v->counter)
-			     : "cc", "memory" );
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+	asm volatile(
+		"	csg	%0,%2,%1"
+		: "+d" (old), "=Q" (v->counter)
+		: "d" (new), "Q" (v->counter)
+		: "cc", "memory");
+#else /* __GNUC__ */
+	asm volatile(
+		"	csg	%0,%3,0(%2)"
+		: "+d" (old), "=m" (v->counter)
+		: "a" (v), "d" (new), "m" (v->counter)
+		: "cc", "memory");
+#endif /* __GNUC__ */
 	return old;
 }
 
diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h
index 0ddcdba..f79c9b7 100644
--- a/include/asm-s390/bitops.h
+++ b/include/asm-s390/bitops.h
@@ -67,16 +67,35 @@
 #define __BITOPS_AND		"nr"
 #define __BITOPS_XOR		"xr"
 
-#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string)		\
-	__asm__ __volatile__("   l   %0,0(%4)\n"			\
-			     "0: lr  %1,%0\n"				\
-			     __op_string "  %1,%3\n"			\
-			     "   cs  %0,%1,0(%4)\n"			\
-			     "   jl  0b"				\
-			     : "=&d" (__old), "=&d" (__new),	       	\
-			       "=m" (*(unsigned long *) __addr)		\
-			     : "d" (__val), "a" (__addr),		\
-			       "m" (*(unsigned long *) __addr) : "cc" );
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+
+#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string)	\
+	asm volatile(						\
+		"	l	%0,%2\n"			\
+		"0:	lr	%1,%0\n"			\
+		__op_string "	%1,%3\n"			\
+		"	cs	%0,%1,%2\n"			\
+		"	jl	0b"				\
+		: "=&d" (__old), "=&d" (__new),			\
+		  "=Q" (*(unsigned long *) __addr)		\
+		: "d" (__val), "Q" (*(unsigned long *) __addr)	\
+		: "cc");
+
+#else /* __GNUC__ */
+
+#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string)	\
+	asm volatile(						\
+		"	l	%0,0(%4)\n"			\
+		"0:	lr	%1,%0\n"			\
+		__op_string "	%1,%3\n"			\
+		"	cs	%0,%1,0(%4)\n"			\
+		"	jl	0b"				\
+		: "=&d" (__old), "=&d" (__new),			\
+		  "=m" (*(unsigned long *) __addr)		\
+		: "d" (__val), "a" (__addr),			\
+		  "m" (*(unsigned long *) __addr) : "cc");
+
+#endif /* __GNUC__ */
 
 #else /* __s390x__ */
 
@@ -86,21 +105,41 @@
 #define __BITOPS_AND		"ngr"
 #define __BITOPS_XOR		"xgr"
 
-#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string)		\
-	__asm__ __volatile__("   lg  %0,0(%4)\n"			\
-			     "0: lgr %1,%0\n"				\
-			     __op_string "  %1,%3\n"			\
-			     "   csg %0,%1,0(%4)\n"			\
-			     "   jl  0b"				\
-			     : "=&d" (__old), "=&d" (__new),	       	\
-			       "=m" (*(unsigned long *) __addr)		\
-			     : "d" (__val), "a" (__addr),		\
-			       "m" (*(unsigned long *) __addr) : "cc" );
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+
+#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string)	\
+	asm volatile(						\
+		"	lg	%0,%2\n"			\
+		"0:	lgr	%1,%0\n"			\
+		__op_string "	%1,%3\n"			\
+		"	csg	%0,%1,%2\n"			\
+		"	jl	0b"				\
+		: "=&d" (__old), "=&d" (__new),			\
+		  "=Q" (*(unsigned long *) __addr)		\
+		: "d" (__val), "Q" (*(unsigned long *) __addr)	\
+		: "cc");
+
+#else /* __GNUC__ */
+
+#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string)	\
+	asm volatile(						\
+		"	lg	%0,0(%4)\n"			\
+		"0:	lgr	%1,%0\n"			\
+		__op_string "	%1,%3\n"			\
+		"	csg	%0,%1,0(%4)\n"			\
+		"	jl	0b"				\
+		: "=&d" (__old), "=&d" (__new),			\
+		  "=m" (*(unsigned long *) __addr)		\
+		: "d" (__val), "a" (__addr),			\
+		  "m" (*(unsigned long *) __addr) : "cc");
+
+
+#endif /* __GNUC__ */
 
 #endif /* __s390x__ */
 
 #define __BITOPS_WORDS(bits) (((bits)+__BITOPS_WORDSIZE-1)/__BITOPS_WORDSIZE)
-#define __BITOPS_BARRIER() __asm__ __volatile__ ( "" : : : "memory" )
+#define __BITOPS_BARRIER() asm volatile("" : : : "memory")
 
 #ifdef CONFIG_SMP
 /*
@@ -217,10 +256,10 @@
 	unsigned long addr;
 
 	addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
-        asm volatile("oc 0(1,%1),0(%2)"
-		     : "=m" (*(char *) addr)
-		     : "a" (addr), "a" (_oi_bitmap + (nr & 7)),
-		       "m" (*(char *) addr) : "cc" );
+	asm volatile(
+		"	oc	0(1,%1),0(%2)"
+		: "=m" (*(char *) addr) : "a" (addr),
+		  "a" (_oi_bitmap + (nr & 7)), "m" (*(char *) addr) : "cc" );
 }
 
 static inline void 
@@ -229,40 +268,7 @@
 	unsigned long addr;
 
 	addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
-	switch (nr&7) {
-	case 0:
-		asm volatile ("oi 0(%1),0x01" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 1:
-		asm volatile ("oi 0(%1),0x02" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 2:
-		asm volatile ("oi 0(%1),0x04" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 3:
-		asm volatile ("oi 0(%1),0x08" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 4:
-		asm volatile ("oi 0(%1),0x10" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 5:
-		asm volatile ("oi 0(%1),0x20" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 6:
-		asm volatile ("oi 0(%1),0x40" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 7:
-		asm volatile ("oi 0(%1),0x80" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	}
+	*(unsigned char *) addr |= 1 << (nr & 7);
 }
 
 #define set_bit_simple(nr,addr) \
@@ -279,10 +285,10 @@
 	unsigned long addr;
 
 	addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
-        asm volatile("nc 0(1,%1),0(%2)"
-		     : "=m" (*(char *) addr)
-		     : "a" (addr), "a" (_ni_bitmap + (nr & 7)),
-		       "m" (*(char *) addr) : "cc" );
+	asm volatile(
+		"	nc	0(1,%1),0(%2)"
+		: "=m" (*(char *) addr)	: "a" (addr),
+		  "a" (_ni_bitmap + (nr & 7)), "m" (*(char *) addr) : "cc");
 }
 
 static inline void 
@@ -291,40 +297,7 @@
 	unsigned long addr;
 
 	addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
-	switch (nr&7) {
-	case 0:
-		asm volatile ("ni 0(%1),0xFE" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 1:
-		asm volatile ("ni 0(%1),0xFD": "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 2:
-		asm volatile ("ni 0(%1),0xFB" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 3:
-		asm volatile ("ni 0(%1),0xF7" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 4:
-		asm volatile ("ni 0(%1),0xEF" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 5:
-		asm volatile ("ni 0(%1),0xDF" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 6:
-		asm volatile ("ni 0(%1),0xBF" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 7:
-		asm volatile ("ni 0(%1),0x7F" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	}
+	*(unsigned char *) addr &= ~(1 << (nr & 7));
 }
 
 #define clear_bit_simple(nr,addr) \
@@ -340,10 +313,10 @@
 	unsigned long addr;
 
 	addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
-        asm volatile("xc 0(1,%1),0(%2)"
-		     :  "=m" (*(char *) addr)
-		     : "a" (addr), "a" (_oi_bitmap + (nr & 7)),
-		       "m" (*(char *) addr) : "cc" );
+	asm volatile(
+		"	xc	0(1,%1),0(%2)"
+		:  "=m" (*(char *) addr) : "a" (addr),
+		   "a" (_oi_bitmap + (nr & 7)), "m" (*(char *) addr) : "cc" );
 }
 
 static inline void 
@@ -352,40 +325,7 @@
 	unsigned long addr;
 
 	addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
-	switch (nr&7) {
-	case 0:
-		asm volatile ("xi 0(%1),0x01" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 1:
-		asm volatile ("xi 0(%1),0x02" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 2:
-		asm volatile ("xi 0(%1),0x04" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 3:
-		asm volatile ("xi 0(%1),0x08" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 4:
-		asm volatile ("xi 0(%1),0x10" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 5:
-		asm volatile ("xi 0(%1),0x20" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 6:
-		asm volatile ("xi 0(%1),0x40" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 7:
-		asm volatile ("xi 0(%1),0x80" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	}
+	*(unsigned char *) addr ^= 1 << (nr & 7);
 }
 
 #define change_bit_simple(nr,addr) \
@@ -404,10 +344,11 @@
 
 	addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
 	ch = *(unsigned char *) addr;
-        asm volatile("oc 0(1,%1),0(%2)"
-		     : "=m" (*(char *) addr)
-		     : "a" (addr), "a" (_oi_bitmap + (nr & 7)),
-		       "m" (*(char *) addr) : "cc", "memory" );
+	asm volatile(
+		"	oc	0(1,%1),0(%2)"
+		: "=m" (*(char *) addr)
+		: "a" (addr), "a" (_oi_bitmap + (nr & 7)),
+		  "m" (*(char *) addr) : "cc", "memory");
 	return (ch >> (nr & 7)) & 1;
 }
 #define __test_and_set_bit(X,Y)		test_and_set_bit_simple(X,Y)
@@ -423,10 +364,11 @@
 
 	addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
 	ch = *(unsigned char *) addr;
-        asm volatile("nc 0(1,%1),0(%2)"
-		     : "=m" (*(char *) addr)
-		     : "a" (addr), "a" (_ni_bitmap + (nr & 7)),
-		       "m" (*(char *) addr) : "cc", "memory" );
+	asm volatile(
+		"	nc	0(1,%1),0(%2)"
+		: "=m" (*(char *) addr)
+		: "a" (addr), "a" (_ni_bitmap + (nr & 7)),
+		  "m" (*(char *) addr) : "cc", "memory");
 	return (ch >> (nr & 7)) & 1;
 }
 #define __test_and_clear_bit(X,Y)	test_and_clear_bit_simple(X,Y)
@@ -442,10 +384,11 @@
 
 	addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
 	ch = *(unsigned char *) addr;
-        asm volatile("xc 0(1,%1),0(%2)"
-		     : "=m" (*(char *) addr)
-		     : "a" (addr), "a" (_oi_bitmap + (nr & 7)),
-		       "m" (*(char *) addr) : "cc", "memory" );
+	asm volatile(
+		"	xc	0(1,%1),0(%2)"
+		: "=m" (*(char *) addr)
+		: "a" (addr), "a" (_oi_bitmap + (nr & 7)),
+		  "m" (*(char *) addr) : "cc", "memory");
 	return (ch >> (nr & 7)) & 1;
 }
 #define __test_and_change_bit(X,Y)	test_and_change_bit_simple(X,Y)
@@ -557,35 +500,36 @@
 
         if (!size)
                 return 0;
-        __asm__("   lhi  %1,-1\n"
-                "   lr   %2,%3\n"
-                "   slr  %0,%0\n"
-                "   ahi  %2,31\n"
-                "   srl  %2,5\n"
-                "0: c    %1,0(%0,%4)\n"
-                "   jne  1f\n"
-                "   la   %0,4(%0)\n"
-                "   brct %2,0b\n"
-                "   lr   %0,%3\n"
-                "   j    4f\n"
-                "1: l    %2,0(%0,%4)\n"
-                "   sll  %0,3\n"
-                "   lhi  %1,0xff\n"
-                "   tml  %2,0xffff\n"
-                "   jno  2f\n"
-                "   ahi  %0,16\n"
-                "   srl  %2,16\n"
-                "2: tml  %2,0x00ff\n"
-                "   jno  3f\n"
-                "   ahi  %0,8\n"
-                "   srl  %2,8\n"
-                "3: nr   %2,%1\n"
-                "   ic   %2,0(%2,%5)\n"
-                "   alr  %0,%2\n"
-                "4:"
-                : "=&a" (res), "=&d" (cmp), "=&a" (count)
-                : "a" (size), "a" (addr), "a" (&_zb_findmap),
-		  "m" (*(addrtype *) addr) : "cc" );
+	asm volatile(
+		"	lhi	%1,-1\n"
+		"	lr	%2,%3\n"
+		"	slr	%0,%0\n"
+		"	ahi	%2,31\n"
+		"	srl	%2,5\n"
+		"0:	c	%1,0(%0,%4)\n"
+		"	jne	1f\n"
+		"	la	%0,4(%0)\n"
+		"	brct	%2,0b\n"
+		"	lr	%0,%3\n"
+		"	j	4f\n"
+		"1:	l	%2,0(%0,%4)\n"
+		"	sll	%0,3\n"
+		"	lhi	%1,0xff\n"
+		"	tml	%2,0xffff\n"
+		"	jno	2f\n"
+		"	ahi	%0,16\n"
+		"	srl	%2,16\n"
+		"2:	tml	%2,0x00ff\n"
+		"	jno	3f\n"
+		"	ahi	%0,8\n"
+		"	srl	%2,8\n"
+		"3:	nr	%2,%1\n"
+		"	ic	%2,0(%2,%5)\n"
+		"	alr	%0,%2\n"
+		"4:"
+		: "=&a" (res), "=&d" (cmp), "=&a" (count)
+		: "a" (size), "a" (addr), "a" (&_zb_findmap),
+		  "m" (*(addrtype *) addr) : "cc");
         return (res < size) ? res : size;
 }
 
@@ -598,35 +542,36 @@
 
         if (!size)
                 return 0;
-        __asm__("   slr  %1,%1\n"
-                "   lr   %2,%3\n"
-                "   slr  %0,%0\n"
-                "   ahi  %2,31\n"
-                "   srl  %2,5\n"
-                "0: c    %1,0(%0,%4)\n"
-                "   jne  1f\n"
-                "   la   %0,4(%0)\n"
-                "   brct %2,0b\n"
-                "   lr   %0,%3\n"
-                "   j    4f\n"
-                "1: l    %2,0(%0,%4)\n"
-                "   sll  %0,3\n"
-                "   lhi  %1,0xff\n"
-                "   tml  %2,0xffff\n"
-                "   jnz  2f\n"
-                "   ahi  %0,16\n"
-                "   srl  %2,16\n"
-                "2: tml  %2,0x00ff\n"
-                "   jnz  3f\n"
-                "   ahi  %0,8\n"
-                "   srl  %2,8\n"
-                "3: nr   %2,%1\n"
-                "   ic   %2,0(%2,%5)\n"
-                "   alr  %0,%2\n"
-                "4:"
-                : "=&a" (res), "=&d" (cmp), "=&a" (count)
-                : "a" (size), "a" (addr), "a" (&_sb_findmap),
-		  "m" (*(addrtype *) addr) : "cc" );
+	asm volatile(
+		"	slr	%1,%1\n"
+		"	lr	%2,%3\n"
+		"	slr	%0,%0\n"
+		"	ahi	%2,31\n"
+		"	srl	%2,5\n"
+		"0:	c	%1,0(%0,%4)\n"
+		"	jne	1f\n"
+		"	la	%0,4(%0)\n"
+		"	brct	%2,0b\n"
+		"	lr	%0,%3\n"
+		"	j	4f\n"
+		"1:	l	%2,0(%0,%4)\n"
+		"	sll	%0,3\n"
+		"	lhi	%1,0xff\n"
+		"	tml	%2,0xffff\n"
+		"	jnz	2f\n"
+		"	ahi	%0,16\n"
+		"	srl	%2,16\n"
+		"2:	tml	%2,0x00ff\n"
+		"	jnz	3f\n"
+		"	ahi	%0,8\n"
+		"	srl	%2,8\n"
+		"3:	nr	%2,%1\n"
+		"	ic	%2,0(%2,%5)\n"
+		"	alr	%0,%2\n"
+		"4:"
+		: "=&a" (res), "=&d" (cmp), "=&a" (count)
+		: "a" (size), "a" (addr), "a" (&_sb_findmap),
+		  "m" (*(addrtype *) addr) : "cc");
         return (res < size) ? res : size;
 }
 
@@ -640,39 +585,40 @@
 
         if (!size)
                 return 0;
-        __asm__("   lghi  %1,-1\n"
-                "   lgr   %2,%3\n"
-                "   slgr  %0,%0\n"
-                "   aghi  %2,63\n"
-                "   srlg  %2,%2,6\n"
-                "0: cg    %1,0(%0,%4)\n"
-                "   jne   1f\n"
-                "   la    %0,8(%0)\n"
-                "   brct  %2,0b\n"
-                "   lgr   %0,%3\n"
-                "   j     5f\n"
-                "1: lg    %2,0(%0,%4)\n"
-                "   sllg  %0,%0,3\n"
-                "   clr   %2,%1\n"
-		"   jne   2f\n"
-		"   aghi  %0,32\n"
-                "   srlg  %2,%2,32\n"
-		"2: lghi  %1,0xff\n"
-                "   tmll  %2,0xffff\n"
-                "   jno   3f\n"
-                "   aghi  %0,16\n"
-                "   srl   %2,16\n"
-                "3: tmll  %2,0x00ff\n"
-                "   jno   4f\n"
-                "   aghi  %0,8\n"
-                "   srl   %2,8\n"
-                "4: ngr   %2,%1\n"
-                "   ic    %2,0(%2,%5)\n"
-                "   algr  %0,%2\n"
-                "5:"
-                : "=&a" (res), "=&d" (cmp), "=&a" (count)
+	asm volatile(
+		"	lghi	%1,-1\n"
+		"	lgr	%2,%3\n"
+		"	slgr	%0,%0\n"
+		"	aghi	%2,63\n"
+		"	srlg	%2,%2,6\n"
+		"0:	cg	%1,0(%0,%4)\n"
+		"	jne	1f\n"
+		"	la	%0,8(%0)\n"
+		"	brct	%2,0b\n"
+		"	lgr	%0,%3\n"
+		"	j	5f\n"
+		"1:	lg	%2,0(%0,%4)\n"
+		"	sllg	%0,%0,3\n"
+		"	clr	%2,%1\n"
+		"	jne	2f\n"
+		"	aghi	%0,32\n"
+		"	srlg	%2,%2,32\n"
+		"2:	lghi	%1,0xff\n"
+		"	tmll	%2,0xffff\n"
+		"	jno	3f\n"
+		"	aghi	%0,16\n"
+		"	srl	%2,16\n"
+		"3:	tmll	%2,0x00ff\n"
+		"	jno	4f\n"
+		"	aghi	%0,8\n"
+		"	srl	%2,8\n"
+		"4:	ngr	%2,%1\n"
+		"	ic	%2,0(%2,%5)\n"
+		"	algr	%0,%2\n"
+		"5:"
+		: "=&a" (res), "=&d" (cmp), "=&a" (count)
 		: "a" (size), "a" (addr), "a" (&_zb_findmap),
-		  "m" (*(addrtype *) addr) : "cc" );
+		  "m" (*(addrtype *) addr) : "cc");
         return (res < size) ? res : size;
 }
 
@@ -684,39 +630,40 @@
 
         if (!size)
                 return 0;
-        __asm__("   slgr  %1,%1\n"
-                "   lgr   %2,%3\n"
-                "   slgr  %0,%0\n"
-                "   aghi  %2,63\n"
-                "   srlg  %2,%2,6\n"
-                "0: cg    %1,0(%0,%4)\n"
-                "   jne   1f\n"
-                "   aghi  %0,8\n"
-                "   brct  %2,0b\n"
-                "   lgr   %0,%3\n"
-                "   j     5f\n"
-                "1: lg    %2,0(%0,%4)\n"
-                "   sllg  %0,%0,3\n"
-                "   clr   %2,%1\n"
-		"   jne   2f\n"
-		"   aghi  %0,32\n"
-                "   srlg  %2,%2,32\n"
-		"2: lghi  %1,0xff\n"
-                "   tmll  %2,0xffff\n"
-                "   jnz   3f\n"
-                "   aghi  %0,16\n"
-                "   srl   %2,16\n"
-                "3: tmll  %2,0x00ff\n"
-                "   jnz   4f\n"
-                "   aghi  %0,8\n"
-                "   srl   %2,8\n"
-                "4: ngr   %2,%1\n"
-                "   ic    %2,0(%2,%5)\n"
-                "   algr  %0,%2\n"
-                "5:"
-                : "=&a" (res), "=&d" (cmp), "=&a" (count)
+	asm volatile(
+		"	slgr	%1,%1\n"
+		"	lgr	%2,%3\n"
+		"	slgr	%0,%0\n"
+		"	aghi	%2,63\n"
+		"	srlg	%2,%2,6\n"
+		"0:	cg	%1,0(%0,%4)\n"
+		"	jne	1f\n"
+		"	aghi	%0,8\n"
+		"	brct	%2,0b\n"
+		"	lgr	%0,%3\n"
+		"	j	5f\n"
+		"1:	lg	%2,0(%0,%4)\n"
+		"	sllg	%0,%0,3\n"
+		"	clr	%2,%1\n"
+		"	jne	2f\n"
+		"	aghi	%0,32\n"
+		"	srlg	%2,%2,32\n"
+		"2:	lghi	%1,0xff\n"
+		"	tmll	%2,0xffff\n"
+		"	jnz	3f\n"
+		"	aghi	%0,16\n"
+		"	srl	%2,16\n"
+		"3:	tmll	%2,0x00ff\n"
+		"	jnz	4f\n"
+		"	aghi	%0,8\n"
+		"	srl	%2,8\n"
+		"4:	ngr	%2,%1\n"
+		"	ic	%2,0(%2,%5)\n"
+		"	algr	%0,%2\n"
+		"5:"
+		: "=&a" (res), "=&d" (cmp), "=&a" (count)
 		: "a" (size), "a" (addr), "a" (&_sb_findmap),
-		  "m" (*(addrtype *) addr) : "cc" );
+		  "m" (*(addrtype *) addr) : "cc");
         return (res < size) ? res : size;
 }
 
@@ -832,36 +779,37 @@
 
         if (!size)
                 return 0;
-        __asm__("   lhi  %1,-1\n"
-                "   lr   %2,%3\n"
-                "   ahi  %2,31\n"
-                "   srl  %2,5\n"
-                "   slr  %0,%0\n"
-                "0: cl   %1,0(%0,%4)\n"
-                "   jne  1f\n"
-                "   ahi  %0,4\n"
-                "   brct %2,0b\n"
-                "   lr   %0,%3\n"
-                "   j    4f\n"
-                "1: l    %2,0(%0,%4)\n"
-                "   sll  %0,3\n"
-                "   ahi  %0,24\n"
-                "   lhi  %1,0xff\n"
-                "   tmh  %2,0xffff\n"
-                "   jo   2f\n"
-                "   ahi  %0,-16\n"
-                "   srl  %2,16\n"
-                "2: tml  %2,0xff00\n"
-                "   jo   3f\n"
-                "   ahi  %0,-8\n"
-                "   srl  %2,8\n"
-                "3: nr   %2,%1\n"
-                "   ic   %2,0(%2,%5)\n"
-                "   alr  %0,%2\n"
-                "4:"
-                : "=&a" (res), "=&d" (cmp), "=&a" (count)
-                : "a" (size), "a" (vaddr), "a" (&_zb_findmap),
-		  "m" (*(addrtype *) vaddr) : "cc" );
+	asm volatile(
+		"	lhi	%1,-1\n"
+		"	lr	%2,%3\n"
+		"	ahi	%2,31\n"
+		"	srl	%2,5\n"
+		"	slr	%0,%0\n"
+		"0:	cl	%1,0(%0,%4)\n"
+		"	jne	1f\n"
+		"	ahi	%0,4\n"
+		"	brct	%2,0b\n"
+		"	lr	%0,%3\n"
+		"	j	4f\n"
+		"1:	l	%2,0(%0,%4)\n"
+		"	sll	%0,3\n"
+		"	ahi	%0,24\n"
+		"	lhi	%1,0xff\n"
+		"	tmh	%2,0xffff\n"
+		"	jo	2f\n"
+		"	ahi	%0,-16\n"
+		"	srl	%2,16\n"
+		"2:	tml	%2,0xff00\n"
+		"	jo	3f\n"
+		"	ahi	%0,-8\n"
+		"	srl	%2,8\n"
+		"3:	nr	%2,%1\n"
+		"	ic	%2,0(%2,%5)\n"
+		"	alr	%0,%2\n"
+		"4:"
+		: "=&a" (res), "=&d" (cmp), "=&a" (count)
+		: "a" (size), "a" (vaddr), "a" (&_zb_findmap),
+		  "m" (*(addrtype *) vaddr) : "cc");
         return (res < size) ? res : size;
 }
 
@@ -875,39 +823,40 @@
 
         if (!size)
                 return 0;
-        __asm__("   lghi  %1,-1\n"
-                "   lgr   %2,%3\n"
-                "   aghi  %2,63\n"
-                "   srlg  %2,%2,6\n"
-                "   slgr  %0,%0\n"
-                "0: clg   %1,0(%0,%4)\n"
-                "   jne   1f\n"
-                "   aghi  %0,8\n"
-                "   brct  %2,0b\n"
-                "   lgr   %0,%3\n"
-                "   j     5f\n"
-                "1: cl    %1,0(%0,%4)\n"
-		"   jne   2f\n"
-		"   aghi  %0,4\n"
-		"2: l     %2,0(%0,%4)\n"
-                "   sllg  %0,%0,3\n"
-                "   aghi  %0,24\n"
-                "   lghi  %1,0xff\n"
-                "   tmlh  %2,0xffff\n"
-                "   jo    3f\n"
-                "   aghi  %0,-16\n"
-                "   srl   %2,16\n"
-                "3: tmll  %2,0xff00\n"
-                "   jo    4f\n"
-                "   aghi  %0,-8\n"
-                "   srl   %2,8\n"
-                "4: ngr   %2,%1\n"
-                "   ic    %2,0(%2,%5)\n"
-                "   algr  %0,%2\n"
-                "5:"
-                : "=&a" (res), "=&d" (cmp), "=&a" (count)
+	asm volatile(
+		"	lghi	%1,-1\n"
+		"	lgr	%2,%3\n"
+		"	aghi	%2,63\n"
+		"	srlg	%2,%2,6\n"
+		"	slgr	%0,%0\n"
+		"0:	clg	%1,0(%0,%4)\n"
+		"	jne	1f\n"
+		"	aghi	%0,8\n"
+		"	brct	%2,0b\n"
+		"	lgr	%0,%3\n"
+		"	j	5f\n"
+		"1:	cl	%1,0(%0,%4)\n"
+		"	jne	2f\n"
+		"	aghi	%0,4\n"
+		"2:	l	%2,0(%0,%4)\n"
+		"	sllg	%0,%0,3\n"
+		"	aghi	%0,24\n"
+		"	lghi	%1,0xff\n"
+		"	tmlh	%2,0xffff\n"
+		"	jo	3f\n"
+		"	aghi	%0,-16\n"
+		"	srl	%2,16\n"
+		"3:	tmll	%2,0xff00\n"
+		"	jo	4f\n"
+		"	aghi	%0,-8\n"
+		"	srl	%2,8\n"
+		"4:	ngr	%2,%1\n"
+		"	ic	%2,0(%2,%5)\n"
+		"	algr	%0,%2\n"
+		"5:"
+		: "=&a" (res), "=&d" (cmp), "=&a" (count)
 		: "a" (size), "a" (vaddr), "a" (&_zb_findmap),
-		  "m" (*(addrtype *) vaddr) : "cc" );
+		  "m" (*(addrtype *) vaddr) : "cc");
         return (res < size) ? res : size;
 }
 
@@ -927,13 +876,16 @@
 	p = addr + offset / __BITOPS_WORDSIZE;
         if (bit) {
 #ifndef __s390x__
-                asm("   ic   %0,0(%1)\n"
-		    "   icm  %0,2,1(%1)\n"
-		    "   icm  %0,4,2(%1)\n"
-		    "   icm  %0,8,3(%1)"
-		    : "=&a" (word) : "a" (p), "m" (*p) : "cc" );
+		asm volatile(
+			"	ic	%0,0(%1)\n"
+			"	icm	%0,2,1(%1)\n"
+			"	icm	%0,4,2(%1)\n"
+			"	icm	%0,8,3(%1)"
+			: "=&a" (word) : "a" (p), "m" (*p) : "cc");
 #else
-                asm("   lrvg %0,%1" : "=a" (word) : "m" (*p) );
+		asm volatile(
+			"	lrvg	%0,%1"
+			: "=a" (word) : "m" (*p) );
 #endif
 		/*
 		 * s390 version of ffz returns __BITOPS_WORDSIZE
diff --git a/include/asm-s390/byteorder.h b/include/asm-s390/byteorder.h
index 2cc35a0..1fe2492 100644
--- a/include/asm-s390/byteorder.h
+++ b/include/asm-s390/byteorder.h
@@ -14,60 +14,54 @@
 #ifdef __GNUC__
 
 #ifdef __s390x__
-static __inline__ __u64 ___arch__swab64p(const __u64 *x)
+static inline __u64 ___arch__swab64p(const __u64 *x)
 {
 	__u64 result;
 
-	__asm__ __volatile__ (
-		"   lrvg %0,%1"
-		: "=d" (result) : "m" (*x) );
+	asm volatile("lrvg %0,%1" : "=d" (result) : "m" (*x));
 	return result;
 }
 
-static __inline__ __u64 ___arch__swab64(__u64 x)
+static inline __u64 ___arch__swab64(__u64 x)
 {
 	__u64 result;
 
-	__asm__ __volatile__ (
-		"   lrvgr %0,%1"
-		: "=d" (result) : "d" (x) );
+	asm volatile("lrvgr %0,%1" : "=d" (result) : "d" (x));
 	return result;
 }
 
-static __inline__ void ___arch__swab64s(__u64 *x)
+static inline void ___arch__swab64s(__u64 *x)
 {
 	*x = ___arch__swab64p(x);
 }
 #endif /* __s390x__ */
 
-static __inline__ __u32 ___arch__swab32p(const __u32 *x)
+static inline __u32 ___arch__swab32p(const __u32 *x)
 {
 	__u32 result;
 	
-	__asm__ __volatile__ (
+	asm volatile(
 #ifndef __s390x__
-		"        icm   %0,8,3(%1)\n"
-		"        icm   %0,4,2(%1)\n"
-		"        icm   %0,2,1(%1)\n"
-		"        ic    %0,0(%1)"
-		: "=&d" (result) : "a" (x), "m" (*x) : "cc" );
+		"	icm	%0,8,3(%1)\n"
+		"	icm	%0,4,2(%1)\n"
+		"	icm	%0,2,1(%1)\n"
+		"	ic	%0,0(%1)"
+		: "=&d" (result) : "a" (x), "m" (*x) : "cc");
 #else /* __s390x__ */
-		"   lrv  %0,%1"
-		: "=d" (result) : "m" (*x) );
+		"	lrv	%0,%1"
+		: "=d" (result) : "m" (*x));
 #endif /* __s390x__ */
 	return result;
 }
 
-static __inline__ __u32 ___arch__swab32(__u32 x)
+static inline __u32 ___arch__swab32(__u32 x)
 {
 #ifndef __s390x__
 	return ___arch__swab32p(&x);
 #else /* __s390x__ */
 	__u32 result;
 	
-	__asm__ __volatile__ (
-		"   lrvr  %0,%1"
-		: "=d" (result) : "d" (x) );
+	asm volatile("lrvr  %0,%1" : "=d" (result) : "d" (x));
 	return result;
 #endif /* __s390x__ */
 }
@@ -81,14 +75,14 @@
 {
 	__u16 result;
 	
-	__asm__ __volatile__ (
+	asm volatile(
 #ifndef __s390x__
-		"        icm   %0,2,1(%1)\n"
-		"        ic    %0,0(%1)\n"
-		: "=&d" (result) : "a" (x), "m" (*x) : "cc" );
+		"	icm	%0,2,1(%1)\n"
+		"	ic	%0,0(%1)\n"
+		: "=&d" (result) : "a" (x), "m" (*x) : "cc");
 #else /* __s390x__ */
-		"   lrvh %0,%1"
-		: "=d" (result) : "m" (*x) );
+		"	lrvh	%0,%1"
+		: "=d" (result) : "m" (*x));
 #endif /* __s390x__ */
 	return result;
 }
diff --git a/include/asm-s390/checksum.h b/include/asm-s390/checksum.h
index 471f2af..37c362d 100644
--- a/include/asm-s390/checksum.h
+++ b/include/asm-s390/checksum.h
@@ -30,57 +30,13 @@
 static inline unsigned int
 csum_partial(const unsigned char * buff, int len, unsigned int sum)
 {
-	/*
-	 * Experiments with ethernet and slip connections show that buf
-	 * is aligned on either a 2-byte or 4-byte boundary.
-	 */
-#ifndef __s390x__
-	register_pair rp;
+	register unsigned long reg2 asm("2") = (unsigned long) buff;
+	register unsigned long reg3 asm("3") = (unsigned long) len;
 
-	rp.subreg.even = (unsigned long) buff;
-	rp.subreg.odd = (unsigned long) len;
-	__asm__ __volatile__ (
-		"0:  cksm %0,%1\n"	/* do checksum on longs */
-		"    jo   0b\n"
-		: "+&d" (sum), "+&a" (rp) : : "cc", "memory" );
-#else /* __s390x__ */
-        __asm__ __volatile__ (
-                "    lgr  2,%1\n"    /* address in gpr 2 */
-                "    lgfr 3,%2\n"    /* length in gpr 3 */
-                "0:  cksm %0,2\n"    /* do checksum on longs */
-                "    jo   0b\n"
-                : "+&d" (sum)
-                : "d" (buff), "d" (len)
-                : "cc", "memory", "2", "3" );
-#endif /* __s390x__ */
-	return sum;
-}
-
-/*
- * csum_partial as an inline function
- */
-static inline unsigned int 
-csum_partial_inline(const unsigned char * buff, int len, unsigned int sum)
-{
-#ifndef __s390x__
-	register_pair rp;
-
-	rp.subreg.even = (unsigned long) buff;
-	rp.subreg.odd = (unsigned long) len;
-	__asm__ __volatile__ (
-		"0:  cksm %0,%1\n"    /* do checksum on longs */
-		"    jo   0b\n"
-                : "+&d" (sum), "+&a" (rp) : : "cc", "memory" );
-#else /* __s390x__ */
-	__asm__ __volatile__ (
-		"    lgr  2,%1\n"    /* address in gpr 2 */
-		"    lgfr 3,%2\n"    /* length in gpr 3 */
-		"0:  cksm %0,2\n"    /* do checksum on longs */
-		"    jo   0b\n"
-                : "+&d" (sum)
-		: "d" (buff), "d" (len)
-                : "cc", "memory", "2", "3" );
-#endif /* __s390x__ */
+	asm volatile(
+		"0:	cksm	%0,%1\n"	/* do checksum on longs */
+		"	jo	0b\n"
+		: "+d" (sum), "+d" (reg2), "+d" (reg3) : : "cc", "memory");
 	return sum;
 }
 
@@ -114,7 +70,7 @@
 csum_partial_copy_nocheck (const char *src, char *dst, int len, unsigned int sum)
 {
         memcpy(dst,src,len);
-        return csum_partial_inline(dst, len, sum);
+	return csum_partial(dst, len, sum);
 }
 
 /*
@@ -126,22 +82,22 @@
 #ifndef __s390x__
 	register_pair rp;
 
-	__asm__ __volatile__ (
-		"    slr  %N1,%N1\n" /* %0 = H L */
-		"    lr   %1,%0\n"   /* %0 = H L, %1 = H L 0 0 */
-		"    srdl %1,16\n"   /* %0 = H L, %1 = 0 H L 0 */
-		"    alr  %1,%N1\n"  /* %0 = H L, %1 = L H L 0 */
-		"    alr  %0,%1\n"   /* %0 = H+L+C L+H */
-		"    srl  %0,16\n"   /* %0 = H+L+C */
-		: "+&d" (sum), "=d" (rp) : : "cc" );
+	asm volatile(
+		"	slr	%N1,%N1\n"	/* %0 = H L */
+		"	lr	%1,%0\n"	/* %0 = H L, %1 = H L 0 0 */
+		"	srdl	%1,16\n"	/* %0 = H L, %1 = 0 H L 0 */
+		"	alr	%1,%N1\n"	/* %0 = H L, %1 = L H L 0 */
+		"	alr	%0,%1\n"	/* %0 = H+L+C L+H */
+		"	srl	%0,16\n"	/* %0 = H+L+C */
+		: "+&d" (sum), "=d" (rp) : : "cc");
 #else /* __s390x__ */
-	__asm__ __volatile__ (
-		"    sr   3,3\n"   /* %0 = H*65536 + L */
-		"    lr   2,%0\n"  /* %0 = H L, R2/R3 = H L / 0 0 */
-		"    srdl 2,16\n"  /* %0 = H L, R2/R3 = 0 H / L 0 */
-		"    alr  2,3\n"   /* %0 = H L, R2/R3 = L H / L 0 */
-		"    alr  %0,2\n"  /* %0 = H+L+C L+H */
-                "    srl  %0,16\n" /* %0 = H+L+C */
+	asm volatile(
+		"	sr	3,3\n"		/* %0 = H*65536 + L */
+		"	lr	2,%0\n"		/* %0 = H L, 2/3 = H L / 0 0 */
+		"	srdl	2,16\n"		/* %0 = H L, 2/3 = 0 H / L 0 */
+		"	alr	2,3\n"		/* %0 = H L, 2/3 = L H / L 0 */
+		"	alr	%0,2\n"		/* %0 = H+L+C L+H */
+		"	srl	%0,16\n"	/* %0 = H+L+C */
 		: "+&d" (sum) : : "cc", "2", "3");
 #endif /* __s390x__ */
 	return ((unsigned short) ~sum);
@@ -155,29 +111,7 @@
 static inline unsigned short
 ip_fast_csum(unsigned char *iph, unsigned int ihl)
 {
-	unsigned long sum;
-#ifndef __s390x__
-	register_pair rp;
-
-	rp.subreg.even = (unsigned long) iph;
-	rp.subreg.odd = (unsigned long) ihl*4;
-        __asm__ __volatile__ (
-		"    sr   %0,%0\n"   /* set sum to zero */
-                "0:  cksm %0,%1\n"   /* do checksum on longs */
-                "    jo   0b\n"
-                : "=&d" (sum), "+&a" (rp) : : "cc", "memory" );
-#else /* __s390x__ */
-        __asm__ __volatile__ (
-		"    slgr %0,%0\n"   /* set sum to zero */
-                "    lgr  2,%1\n"    /* address in gpr 2 */
-                "    lgfr 3,%2\n"    /* length in gpr 3 */
-                "0:  cksm %0,2\n"    /* do checksum on ints */
-                "    jo   0b\n"
-                : "=&d" (sum)
-                : "d" (iph), "d" (ihl*4)
-                : "cc", "memory", "2", "3" );
-#endif /* __s390x__ */
-        return csum_fold(sum);
+	return csum_fold(csum_partial(iph, ihl*4, 0));
 }
 
 /*
@@ -190,47 +124,47 @@
                    unsigned int sum)
 {
 #ifndef __s390x__
-	__asm__ __volatile__ (
-                "    alr   %0,%1\n"  /* sum += saddr */
-                "    brc   12,0f\n"
-		"    ahi   %0,1\n"   /* add carry */
+	asm volatile(
+		"	alr	%0,%1\n" /* sum += saddr */
+		"	brc	12,0f\n"
+		"	ahi	%0,1\n"  /* add carry */
 		"0:"
-		: "+&d" (sum) : "d" (saddr) : "cc" );
-	__asm__ __volatile__ (
-                "    alr   %0,%1\n"  /* sum += daddr */
-                "    brc   12,1f\n"
-                "    ahi   %0,1\n"   /* add carry */
+		: "+&d" (sum) : "d" (saddr) : "cc");
+	asm volatile(
+		"	alr	%0,%1\n" /* sum += daddr */
+		"	brc	12,1f\n"
+		"	ahi	%0,1\n"  /* add carry */
 		"1:"
-		: "+&d" (sum) : "d" (daddr) : "cc" );
-	__asm__ __volatile__ (
-                "    alr   %0,%1\n"  /* sum += (len<<16) + (proto<<8) */
-		"    brc   12,2f\n"
-		"    ahi   %0,1\n"   /* add carry */
+		: "+&d" (sum) : "d" (daddr) : "cc");
+	asm volatile(
+		"	alr	%0,%1\n" /* sum += (len<<16) + (proto<<8) */
+		"	brc	12,2f\n"
+		"	ahi	%0,1\n"  /* add carry */
 		"2:"
 		: "+&d" (sum)
 		: "d" (((unsigned int) len<<16) + (unsigned int) proto)
-		: "cc" );
+		: "cc");
 #else /* __s390x__ */
-	__asm__ __volatile__ (
-                "    lgfr  %0,%0\n"
-                "    algr  %0,%1\n"  /* sum += saddr */
-                "    brc   12,0f\n"
-		"    aghi  %0,1\n"   /* add carry */
-		"0:  algr  %0,%2\n"  /* sum += daddr */
-                "    brc   12,1f\n"
-                "    aghi  %0,1\n"   /* add carry */
-		"1:  algfr %0,%3\n"  /* sum += (len<<16) + proto */
-		"    brc   12,2f\n"
-		"    aghi  %0,1\n"   /* add carry */
-		"2:  srlg  0,%0,32\n"
-                "    alr   %0,0\n"   /* fold to 32 bits */
-                "    brc   12,3f\n"
-                "    ahi   %0,1\n"   /* add carry */
-                "3:  llgfr %0,%0"
+	asm volatile(
+		"	lgfr	%0,%0\n"
+		"	algr	%0,%1\n"  /* sum += saddr */
+		"	brc	12,0f\n"
+		"	aghi	%0,1\n"   /* add carry */
+		"0:	algr	%0,%2\n"  /* sum += daddr */
+		"	brc	12,1f\n"
+		"	aghi	%0,1\n"   /* add carry */
+		"1:	algfr	%0,%3\n"  /* sum += (len<<16) + proto */
+		"	brc	12,2f\n"
+		"	aghi	%0,1\n"   /* add carry */
+		"2:	srlg	0,%0,32\n"
+		"	alr	%0,0\n"   /* fold to 32 bits */
+		"	brc	12,3f\n"
+		"	ahi	%0,1\n"   /* add carry */
+		"3:	llgfr	%0,%0"
 		: "+&d" (sum)
 		: "d" (saddr), "d" (daddr),
 		  "d" (((unsigned int) len<<16) + (unsigned int) proto)
-		: "cc", "0" );
+		: "cc", "0");
 #endif /* __s390x__ */
 	return sum;
 }
diff --git a/include/asm-s390/ebcdic.h b/include/asm-s390/ebcdic.h
index 15fd2ed..7f6f641 100644
--- a/include/asm-s390/ebcdic.h
+++ b/include/asm-s390/ebcdic.h
@@ -26,16 +26,16 @@
 {
 	if (nr-- <= 0)
 		return;
-        __asm__ __volatile__(
-		"   bras 1,1f\n"
-		"   tr   0(1,%0),0(%2)\n"
-                "0: tr   0(256,%0),0(%2)\n"
-		"   la   %0,256(%0)\n"
-		"1: ahi  %1,-256\n"
-		"   jnm  0b\n"
-		"   ex   %1,0(1)"
-                : "+&a" (addr), "+&a" (nr)
-                : "a" (codepage) : "cc", "memory", "1" );
+	asm volatile(
+		"	bras	1,1f\n"
+		"	tr	0(1,%0),0(%2)\n"
+		"0:	tr	0(256,%0),0(%2)\n"
+		"	la	%0,256(%0)\n"
+		"1:	ahi	%1,-256\n"
+		"	jnm	0b\n"
+		"	ex	%1,0(1)"
+		: "+&a" (addr), "+&a" (nr)
+		: "a" (codepage) : "cc", "memory", "1");
 }
 
 #define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr)
diff --git a/include/asm-s390/io.h b/include/asm-s390/io.h
index a6cc27e..63c78b9 100644
--- a/include/asm-s390/io.h
+++ b/include/asm-s390/io.h
@@ -27,18 +27,16 @@
 static inline unsigned long virt_to_phys(volatile void * address)
 {
 	unsigned long real_address;
-	__asm__ (
+	asm volatile(
 #ifndef __s390x__
-		 "   lra    %0,0(%1)\n"
-                 "   jz     0f\n"
-                 "   sr     %0,%0\n"
+		 "	lra	%0,0(%1)\n"
 #else /* __s390x__ */
-		 "   lrag   %0,0(%1)\n"
-                 "   jz     0f\n"
-                 "   slgr   %0,%0\n"
+		 "	lrag	%0,0(%1)\n"
 #endif /* __s390x__ */
+		 "	jz	0f\n"
+		 "	la	%0,0\n"
                  "0:"
-                 : "=a" (real_address) : "a" (address) : "cc" );
+		 : "=a" (real_address) : "a" (address) : "cc");
         return real_address;
 }
 
diff --git a/include/asm-s390/irqflags.h b/include/asm-s390/irqflags.h
index 3b566a5..3f26131 100644
--- a/include/asm-s390/irqflags.h
+++ b/include/asm-s390/irqflags.h
@@ -10,43 +10,93 @@
 
 #ifdef __KERNEL__
 
-/* interrupt control.. */
-#define raw_local_irq_enable() ({ \
-	unsigned long  __dummy; \
-	__asm__ __volatile__ ( \
-		"stosm 0(%1),0x03" \
-		: "=m" (__dummy) : "a" (&__dummy) : "memory" ); \
-	})
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
 
-#define raw_local_irq_disable() ({ \
-	unsigned long __flags; \
-	__asm__ __volatile__ ( \
-		"stnsm 0(%1),0xfc" : "=m" (__flags) : "a" (&__flags) ); \
-	__flags; \
-	})
-
-#define raw_local_save_flags(x)							\
-do {										\
-	typecheck(unsigned long, x);						\
-	__asm__ __volatile__("stosm 0(%1),0" : "=m" (x) : "a" (&x), "m" (x) );	\
-} while (0)
-
-#define raw_local_irq_restore(x)						\
-do {										\
-	typecheck(unsigned long, x);						\
-	__asm__ __volatile__("ssm   0(%0)" : : "a" (&x), "m" (x) : "memory");	\
-} while (0)
-
-#define raw_irqs_disabled()		\
-({					\
-	unsigned long flags;		\
-	raw_local_save_flags(flags);	\
-	!((flags >> __FLAG_SHIFT) & 3);	\
+/* store then or system mask. */
+#define __raw_local_irq_stosm(__or)					\
+({									\
+	unsigned long __mask;						\
+	asm volatile(							\
+		"	stosm	%0,%1"					\
+		: "=Q" (__mask) : "i" (__or) : "memory");		\
+	__mask;								\
 })
 
+/* store then and system mask. */
+#define __raw_local_irq_stnsm(__and)					\
+({									\
+	unsigned long __mask;						\
+	asm volatile(							\
+		"	stnsm	%0,%1"					\
+		: "=Q" (__mask) : "i" (__and) : "memory");		\
+	__mask;								\
+})
+
+/* set system mask. */
+#define __raw_local_irq_ssm(__mask)					\
+({									\
+	asm volatile("ssm   %0" : : "Q" (__mask) : "memory");		\
+})
+
+#else /* __GNUC__ */
+
+/* store then or system mask. */
+#define __raw_local_irq_stosm(__or)					\
+({									\
+	unsigned long __mask;						\
+	asm volatile(							\
+		"	stosm	0(%1),%2"				\
+		: "=m" (__mask)						\
+		: "a" (&__mask), "i" (__or) : "memory");		\
+	__mask;								\
+})
+
+/* store then and system mask. */
+#define __raw_local_irq_stnsm(__and)					\
+({									\
+	unsigned long __mask;						\
+	asm volatile(							\
+		"	stnsm	0(%1),%2"				\
+		: "=m" (__mask)						\
+		: "a" (&__mask), "i" (__and) : "memory");		\
+	__mask;								\
+})
+
+/* set system mask. */
+#define __raw_local_irq_ssm(__mask)					\
+({									\
+	asm volatile(							\
+		"	ssm	0(%0)"					\
+		: : "a" (&__mask), "m" (__mask) : "memory");		\
+})
+
+#endif /* __GNUC__ */
+
+/* interrupt control.. */
+static inline unsigned long raw_local_irq_enable(void)
+{
+	return __raw_local_irq_stosm(0x03);
+}
+
+static inline unsigned long raw_local_irq_disable(void)
+{
+	return __raw_local_irq_stnsm(0xfc);
+}
+
+#define raw_local_save_flags(x)						\
+do {									\
+	typecheck(unsigned long, x);					\
+	(x) = __raw_local_irq_stosm(0x00);				\
+} while (0)
+
+static inline void raw_local_irq_restore(unsigned long flags)
+{
+	__raw_local_irq_ssm(flags);
+}
+
 static inline int raw_irqs_disabled_flags(unsigned long flags)
 {
-	return !((flags >> __FLAG_SHIFT) & 3);
+	return !(flags & (3UL << (BITS_PER_LONG - 8)));
 }
 
 /* For spinlocks etc */
diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h
index 18695d1..06583ed 100644
--- a/include/asm-s390/lowcore.h
+++ b/include/asm-s390/lowcore.h
@@ -359,7 +359,7 @@
 
 static inline void set_prefix(__u32 address)
 {
-        __asm__ __volatile__ ("spx %0" : : "m" (address) : "memory" );
+	asm volatile("spx %0" : : "m" (address) : "memory");
 }
 
 #define __PANIC_MAGIC           0xDEADC0DE
diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h
index b2628dc..796c400 100644
--- a/include/asm-s390/page.h
+++ b/include/asm-s390/page.h
@@ -22,89 +22,45 @@
 #include <asm/setup.h>
 #ifndef __ASSEMBLY__
 
-#ifndef __s390x__
-
 static inline void clear_page(void *page)
 {
-	register_pair rp;
-
-	rp.subreg.even = (unsigned long) page;
-	rp.subreg.odd = (unsigned long) 4096;
-        asm volatile ("   slr  1,1\n"
-		      "   mvcl %0,0"
-		      : "+&a" (rp) : : "memory", "cc", "1" );
+	register unsigned long reg1 asm ("1") = 0;
+	register void *reg2 asm ("2") = page;
+	register unsigned long reg3 asm ("3") = 4096;
+	asm volatile(
+		"	mvcl	2,0"
+		: "+d" (reg2), "+d" (reg3) : "d" (reg1) : "memory", "cc");
 }
 
 static inline void copy_page(void *to, void *from)
 {
-        if (MACHINE_HAS_MVPG)
-		asm volatile ("   sr   0,0\n"
-			      "   mvpg %0,%1"
-			      : : "a" ((void *)(to)), "a" ((void *)(from))
-			      : "memory", "cc", "0" );
-	else
-		asm volatile ("   mvc  0(256,%0),0(%1)\n"
-			      "   mvc  256(256,%0),256(%1)\n"
-			      "   mvc  512(256,%0),512(%1)\n"
-			      "   mvc  768(256,%0),768(%1)\n"
-			      "   mvc  1024(256,%0),1024(%1)\n"
-			      "   mvc  1280(256,%0),1280(%1)\n"
-			      "   mvc  1536(256,%0),1536(%1)\n"
-			      "   mvc  1792(256,%0),1792(%1)\n"
-			      "   mvc  2048(256,%0),2048(%1)\n"
-			      "   mvc  2304(256,%0),2304(%1)\n"
-			      "   mvc  2560(256,%0),2560(%1)\n"
-			      "   mvc  2816(256,%0),2816(%1)\n"
-			      "   mvc  3072(256,%0),3072(%1)\n"
-			      "   mvc  3328(256,%0),3328(%1)\n"
-			      "   mvc  3584(256,%0),3584(%1)\n"
-			      "   mvc  3840(256,%0),3840(%1)\n"
-			      : : "a"((void *)(to)),"a"((void *)(from)) 
-			      : "memory" );
+	if (MACHINE_HAS_MVPG) {
+		register unsigned long reg0 asm ("0") = 0;
+		asm volatile(
+			"	mvpg	%0,%1"
+			: : "a" (to), "a" (from), "d" (reg0)
+			: "memory", "cc");
+	} else
+		asm volatile(
+			"	mvc	0(256,%0),0(%1)\n"
+			"	mvc	256(256,%0),256(%1)\n"
+			"	mvc	512(256,%0),512(%1)\n"
+			"	mvc	768(256,%0),768(%1)\n"
+			"	mvc	1024(256,%0),1024(%1)\n"
+			"	mvc	1280(256,%0),1280(%1)\n"
+			"	mvc	1536(256,%0),1536(%1)\n"
+			"	mvc	1792(256,%0),1792(%1)\n"
+			"	mvc	2048(256,%0),2048(%1)\n"
+			"	mvc	2304(256,%0),2304(%1)\n"
+			"	mvc	2560(256,%0),2560(%1)\n"
+			"	mvc	2816(256,%0),2816(%1)\n"
+			"	mvc	3072(256,%0),3072(%1)\n"
+			"	mvc	3328(256,%0),3328(%1)\n"
+			"	mvc	3584(256,%0),3584(%1)\n"
+			"	mvc	3840(256,%0),3840(%1)\n"
+			: : "a" (to), "a" (from) : "memory");
 }
 
-#else /* __s390x__ */
-
-static inline void clear_page(void *page)
-{
-        asm volatile ("   lgr  2,%0\n"
-                      "   lghi 3,4096\n"
-                      "   slgr 1,1\n"
-                      "   mvcl 2,0"
-                      : : "a" ((void *) (page))
-		      : "memory", "cc", "1", "2", "3" );
-}
-
-static inline void copy_page(void *to, void *from)
-{
-        if (MACHINE_HAS_MVPG)
-		asm volatile ("   sgr  0,0\n"
-			      "   mvpg %0,%1"
-			      : : "a" ((void *)(to)), "a" ((void *)(from))
-			      : "memory", "cc", "0" );
-	else
-		asm volatile ("   mvc  0(256,%0),0(%1)\n"
-			      "   mvc  256(256,%0),256(%1)\n"
-			      "   mvc  512(256,%0),512(%1)\n"
-			      "   mvc  768(256,%0),768(%1)\n"
-			      "   mvc  1024(256,%0),1024(%1)\n"
-			      "   mvc  1280(256,%0),1280(%1)\n"
-			      "   mvc  1536(256,%0),1536(%1)\n"
-			      "   mvc  1792(256,%0),1792(%1)\n"
-			      "   mvc  2048(256,%0),2048(%1)\n"
-			      "   mvc  2304(256,%0),2304(%1)\n"
-			      "   mvc  2560(256,%0),2560(%1)\n"
-			      "   mvc  2816(256,%0),2816(%1)\n"
-			      "   mvc  3072(256,%0),3072(%1)\n"
-			      "   mvc  3328(256,%0),3328(%1)\n"
-			      "   mvc  3584(256,%0),3584(%1)\n"
-			      "   mvc  3840(256,%0),3840(%1)\n"
-			      : : "a"((void *)(to)),"a"((void *)(from)) 
-			      : "memory" );
-}
-
-#endif /* __s390x__ */
-
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
@@ -159,7 +115,7 @@
 static inline void
 page_set_storage_key(unsigned long addr, unsigned int skey)
 {
-	asm volatile ( "sske %0,%1" : : "d" (skey), "a" (addr) );
+	asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
 }
 
 static inline unsigned int
@@ -167,8 +123,7 @@
 {
 	unsigned int skey;
 
-	asm volatile ( "iske %0,%1" : "=d" (skey) : "a" (addr), "0" (0) );
-
+	asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr), "0" (0));
 	return skey;
 }
 
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index e965309..83425cd 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -554,9 +554,10 @@
 		/* ipte in zarch mode can do the math */
 		pte_t *pto = ptep;
 #endif
-		asm volatile ("ipte %2,%3"
-			      : "=m" (*ptep) : "m" (*ptep),
-				"a" (pto), "a" (address) );
+		asm volatile(
+			"	ipte	%2,%3"
+			: "=m" (*ptep) : "m" (*ptep),
+			  "a" (pto), "a" (address));
 	}
 	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
 }
@@ -609,16 +610,17 @@
 /*
  * Test and clear referenced bit in storage key.
  */
-#define page_test_and_clear_young(page)					  \
-({									  \
-	struct page *__page = (page);					  \
-	unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT);  \
-	int __ccode;							  \
-	asm volatile ("rrbe 0,%1\n\t"					  \
-		      "ipm  %0\n\t"					  \
-		      "srl  %0,28\n\t" 					  \
-                      : "=d" (__ccode) : "a" (__physpage) : "cc" );	  \
-	(__ccode & 2);							  \
+#define page_test_and_clear_young(page)					\
+({									\
+	struct page *__page = (page);					\
+	unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT);\
+	int __ccode;							\
+	asm volatile(							\
+		"	rrbe	0,%1\n"					\
+		"	ipm	%0\n"					\
+		"	srl	%0,28\n"				\
+		: "=d" (__ccode) : "a" (__physpage) : "cc");		\
+	(__ccode & 2);							\
 })
 
 /*
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h
index 578c220..cbbedc6 100644
--- a/include/asm-s390/processor.h
+++ b/include/asm-s390/processor.h
@@ -13,7 +13,6 @@
 #ifndef __ASM_S390_PROCESSOR_H
 #define __ASM_S390_PROCESSOR_H
 
-#include <asm/page.h>
 #include <asm/ptrace.h>
 
 #ifdef __KERNEL__
@@ -21,7 +20,7 @@
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
  */
-#define current_text_addr() ({ void *pc; __asm__("basr %0,0":"=a"(pc)); pc; })
+#define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; })
 
 /*
  *  CPU type and hardware bug flags. Kept separately for each CPU.
@@ -202,7 +201,7 @@
 static inline void cpu_relax(void)
 {
 	if (MACHINE_HAS_DIAG44)
-		asm volatile ("diag 0,0,68" : : : "memory");
+		asm volatile("diag 0,0,68" : : : "memory");
 	else
 		barrier();
 }
@@ -213,9 +212,9 @@
 static inline void __load_psw(psw_t psw)
 {
 #ifndef __s390x__
-	asm volatile ("lpsw  0(%0)" : : "a" (&psw), "m" (psw) : "cc" );
+	asm volatile("lpsw  0(%0)" : : "a" (&psw), "m" (psw) : "cc");
 #else
-	asm volatile ("lpswe 0(%0)" : : "a" (&psw), "m" (psw) : "cc" );
+	asm volatile("lpswe 0(%0)" : : "a" (&psw), "m" (psw) : "cc");
 #endif
 }
 
@@ -232,20 +231,20 @@
 	psw.mask = mask;
 
 #ifndef __s390x__
-	asm volatile (
-		"    basr %0,0\n"
-		"0:  ahi  %0,1f-0b\n"
-		"    st	  %0,4(%1)\n"
-		"    lpsw 0(%1)\n"
+	asm volatile(
+		"	basr	%0,0\n"
+		"0:	ahi	%0,1f-0b\n"
+		"	st	%0,4(%1)\n"
+		"	lpsw	0(%1)\n"
 		"1:"
-		: "=&d" (addr) : "a" (&psw), "m" (psw) : "memory", "cc" );
+		: "=&d" (addr) : "a" (&psw), "m" (psw) : "memory", "cc");
 #else /* __s390x__ */
-	asm volatile (
-		"    larl  %0,1f\n"
-		"    stg   %0,8(%1)\n"
-		"    lpswe 0(%1)\n"
+	asm volatile(
+		"	larl	%0,1f\n"
+		"	stg	%0,8(%1)\n"
+		"	lpswe	0(%1)\n"
 		"1:"
-		: "=&d" (addr) : "a" (&psw), "m" (psw) : "memory", "cc" );
+		: "=&d" (addr) : "a" (&psw), "m" (psw) : "memory", "cc");
 #endif /* __s390x__ */
 }
  
@@ -274,56 +273,57 @@
          * the processor is dead afterwards
          */
 #ifndef __s390x__
-        asm volatile ("    stctl 0,0,0(%2)\n"
-                      "    ni    0(%2),0xef\n" /* switch off protection */
-                      "    lctl  0,0,0(%2)\n"
-                      "    stpt  0xd8\n"       /* store timer */
-                      "    stckc 0xe0\n"       /* store clock comparator */
-                      "    stpx  0x108\n"      /* store prefix register */
-                      "    stam  0,15,0x120\n" /* store access registers */
-                      "    std   0,0x160\n"    /* store f0 */
-                      "    std   2,0x168\n"    /* store f2 */
-                      "    std   4,0x170\n"    /* store f4 */
-                      "    std   6,0x178\n"    /* store f6 */
-                      "    stm   0,15,0x180\n" /* store general registers */
-                      "    stctl 0,15,0x1c0\n" /* store control registers */
-                      "    oi    0x1c0,0x10\n" /* fake protection bit */
-                      "    lpsw 0(%1)"
-                      : "=m" (ctl_buf)
-		      : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc" );
+	asm volatile(
+		"	stctl	0,0,0(%2)\n"
+		"	ni	0(%2),0xef\n"	/* switch off protection */
+		"	lctl	0,0,0(%2)\n"
+		"	stpt	0xd8\n"		/* store timer */
+		"	stckc	0xe0\n"		/* store clock comparator */
+		"	stpx	0x108\n"	/* store prefix register */
+		"	stam	0,15,0x120\n"	/* store access registers */
+		"	std	0,0x160\n"	/* store f0 */
+		"	std	2,0x168\n"	/* store f2 */
+		"	std	4,0x170\n"	/* store f4 */
+		"	std	6,0x178\n"	/* store f6 */
+		"	stm	0,15,0x180\n"	/* store general registers */
+		"	stctl	0,15,0x1c0\n"	/* store control registers */
+		"	oi	0x1c0,0x10\n"	/* fake protection bit */
+		"	lpsw	0(%1)"
+		: "=m" (ctl_buf)
+		: "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc");
 #else /* __s390x__ */
-        asm volatile ("    stctg 0,0,0(%2)\n"
-                      "    ni    4(%2),0xef\n" /* switch off protection */
-                      "    lctlg 0,0,0(%2)\n"
-                      "    lghi  1,0x1000\n"
-                      "    stpt  0x328(1)\n"      /* store timer */
-                      "    stckc 0x330(1)\n"      /* store clock comparator */
-                      "    stpx  0x318(1)\n"      /* store prefix register */
-                      "    stam  0,15,0x340(1)\n" /* store access registers */
-                      "    stfpc 0x31c(1)\n"      /* store fpu control */
-                      "    std   0,0x200(1)\n"    /* store f0 */
-                      "    std   1,0x208(1)\n"    /* store f1 */
-                      "    std   2,0x210(1)\n"    /* store f2 */
-                      "    std   3,0x218(1)\n"    /* store f3 */
-                      "    std   4,0x220(1)\n"    /* store f4 */
-                      "    std   5,0x228(1)\n"    /* store f5 */
-                      "    std   6,0x230(1)\n"    /* store f6 */
-                      "    std   7,0x238(1)\n"    /* store f7 */
-                      "    std   8,0x240(1)\n"    /* store f8 */
-                      "    std   9,0x248(1)\n"    /* store f9 */
-                      "    std   10,0x250(1)\n"   /* store f10 */
-                      "    std   11,0x258(1)\n"   /* store f11 */
-                      "    std   12,0x260(1)\n"   /* store f12 */
-                      "    std   13,0x268(1)\n"   /* store f13 */
-                      "    std   14,0x270(1)\n"   /* store f14 */
-                      "    std   15,0x278(1)\n"   /* store f15 */
-                      "    stmg  0,15,0x280(1)\n" /* store general registers */
-                      "    stctg 0,15,0x380(1)\n" /* store control registers */
-                      "    oi    0x384(1),0x10\n" /* fake protection bit */
-                      "    lpswe 0(%1)"
-                      : "=m" (ctl_buf)
-		      : "a" (&dw_psw), "a" (&ctl_buf),
-		        "m" (dw_psw) : "cc", "0", "1");
+	asm volatile(
+		"	stctg	0,0,0(%2)\n"
+		"	ni	4(%2),0xef\n"	/* switch off protection */
+		"	lctlg	0,0,0(%2)\n"
+		"	lghi	1,0x1000\n"
+		"	stpt	0x328(1)\n"	/* store timer */
+		"	stckc	0x330(1)\n"	/* store clock comparator */
+		"	stpx	0x318(1)\n"	/* store prefix register */
+		"	stam	0,15,0x340(1)\n"/* store access registers */
+		"	stfpc	0x31c(1)\n"	/* store fpu control */
+		"	std	0,0x200(1)\n"	/* store f0 */
+		"	std	1,0x208(1)\n"	/* store f1 */
+		"	std	2,0x210(1)\n"	/* store f2 */
+		"	std	3,0x218(1)\n"	/* store f3 */
+		"	std	4,0x220(1)\n"	/* store f4 */
+		"	std	5,0x228(1)\n"	/* store f5 */
+		"	std	6,0x230(1)\n"	/* store f6 */
+		"	std	7,0x238(1)\n"	/* store f7 */
+		"	std	8,0x240(1)\n"	/* store f8 */
+		"	std	9,0x248(1)\n"	/* store f9 */
+		"	std	10,0x250(1)\n"	/* store f10 */
+		"	std	11,0x258(1)\n"	/* store f11 */
+		"	std	12,0x260(1)\n"	/* store f12 */
+		"	std	13,0x268(1)\n"	/* store f13 */
+		"	std	14,0x270(1)\n"	/* store f14 */
+		"	std	15,0x278(1)\n"	/* store f15 */
+		"	stmg	0,15,0x280(1)\n"/* store general registers */
+		"	stctg	0,15,0x380(1)\n"/* store control registers */
+		"	oi	0x384(1),0x10\n"/* fake protection bit */
+		"	lpswe	0(%1)"
+		: "=m" (ctl_buf)
+		: "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0");
 #endif /* __s390x__ */
 }
 
diff --git a/include/asm-s390/ptrace.h b/include/asm-s390/ptrace.h
index 4d75d77..8d2bf65 100644
--- a/include/asm-s390/ptrace.h
+++ b/include/asm-s390/ptrace.h
@@ -479,7 +479,7 @@
 static inline void
 psw_set_key(unsigned int key)
 {
-	asm volatile ( "spka 0(%0)" : : "d" (key) );
+	asm volatile("spka 0(%0)" : : "d" (key));
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/include/asm-s390/rwsem.h b/include/asm-s390/rwsem.h
index 13ec169..90f4ecc 100644
--- a/include/asm-s390/rwsem.h
+++ b/include/asm-s390/rwsem.h
@@ -122,23 +122,23 @@
 {
 	signed long old, new;
 
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   ahi  %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	ahi	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   aghi %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	aghi	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count),
-		  "i" (RWSEM_ACTIVE_READ_BIAS) : "cc", "memory" );
+		  "i" (RWSEM_ACTIVE_READ_BIAS) : "cc", "memory");
 	if (old < 0)
 		rwsem_down_read_failed(sem);
 }
@@ -150,27 +150,27 @@
 {
 	signed long old, new;
 
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: ltr  %1,%0\n"
-		"   jm   1f\n"
-		"   ahi  %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b\n"
+		"	l	%0,0(%3)\n"
+		"0:	ltr	%1,%0\n"
+		"	jm	1f\n"
+		"	ahi	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b\n"
 		"1:"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: ltgr %1,%0\n"
-		"   jm   1f\n"
-		"   aghi %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b\n"
+		"	lg	%0,0(%3)\n"
+		"0:	ltgr	%1,%0\n"
+		"	jm	1f\n"
+		"	aghi	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b\n"
 		"1:"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count),
-		  "i" (RWSEM_ACTIVE_READ_BIAS) : "cc", "memory" );
+		  "i" (RWSEM_ACTIVE_READ_BIAS) : "cc", "memory");
 	return old >= 0 ? 1 : 0;
 }
 
@@ -182,23 +182,23 @@
 	signed long old, new, tmp;
 
 	tmp = RWSEM_ACTIVE_WRITE_BIAS;
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   a    %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	a	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   ag   %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	ag	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count), "m" (tmp)
-		: "cc", "memory" );
+		: "cc", "memory");
 	if (old != 0)
 		rwsem_down_write_failed(sem);
 }
@@ -215,24 +215,24 @@
 {
 	signed long old;
 
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%2)\n"
-		"0: ltr  %0,%0\n"
-		"   jnz  1f\n"
-		"   cs   %0,%4,0(%2)\n"
-		"   jl   0b\n"
+		"	l	%0,0(%2)\n"
+		"0:	ltr	%0,%0\n"
+		"	jnz	1f\n"
+		"	cs	%0,%4,0(%2)\n"
+		"	jl	0b\n"
 #else /* __s390x__ */
-		"   lg   %0,0(%2)\n"
-		"0: ltgr %0,%0\n"
-		"   jnz  1f\n"
-		"   csg  %0,%4,0(%2)\n"
-		"   jl   0b\n"
+		"	lg	%0,0(%2)\n"
+		"0:	ltgr	%0,%0\n"
+		"	jnz	1f\n"
+		"	csg	%0,%4,0(%2)\n"
+		"	jl	0b\n"
 #endif /* __s390x__ */
 		"1:"
-                : "=&d" (old), "=m" (sem->count)
+		: "=&d" (old), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count),
-		  "d" (RWSEM_ACTIVE_WRITE_BIAS) : "cc", "memory" );
+		  "d" (RWSEM_ACTIVE_WRITE_BIAS) : "cc", "memory");
 	return (old == RWSEM_UNLOCKED_VALUE) ? 1 : 0;
 }
 
@@ -243,24 +243,24 @@
 {
 	signed long old, new;
 
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   ahi  %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	ahi	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   aghi %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	aghi	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count),
 		  "i" (-RWSEM_ACTIVE_READ_BIAS)
-		: "cc", "memory" );
+		: "cc", "memory");
 	if (new < 0)
 		if ((new & RWSEM_ACTIVE_MASK) == 0)
 			rwsem_wake(sem);
@@ -274,23 +274,23 @@
 	signed long old, new, tmp;
 
 	tmp = -RWSEM_ACTIVE_WRITE_BIAS;
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   a    %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	a	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   ag   %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	ag	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count), "m" (tmp)
-		: "cc", "memory" );
+		: "cc", "memory");
 	if (new < 0)
 		if ((new & RWSEM_ACTIVE_MASK) == 0)
 			rwsem_wake(sem);
@@ -304,23 +304,23 @@
 	signed long old, new, tmp;
 
 	tmp = -RWSEM_WAITING_BIAS;
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   a    %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	a	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   ag   %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	ag	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count), "m" (tmp)
-		: "cc", "memory" );
+		: "cc", "memory");
 	if (new > 1)
 		rwsem_downgrade_wake(sem);
 }
@@ -332,23 +332,23 @@
 {
 	signed long old, new;
 
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   ar   %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	ar	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   agr  %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	agr	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count), "d" (delta)
-		: "cc", "memory" );
+		: "cc", "memory");
 }
 
 /*
@@ -358,23 +358,23 @@
 {
 	signed long old, new;
 
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   ar   %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	ar	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   agr  %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	agr	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count), "d" (delta)
-		: "cc", "memory" );
+		: "cc", "memory");
 	return new;
 }
 
diff --git a/include/asm-s390/semaphore.h b/include/asm-s390/semaphore.h
index 32cdc69..dbce058 100644
--- a/include/asm-s390/semaphore.h
+++ b/include/asm-s390/semaphore.h
@@ -85,17 +85,17 @@
 	 *       sem->count.counter = --new_val;
 	 * In the ppc code this is called atomic_dec_if_positive.
 	 */
-	__asm__ __volatile__ (
-		"   l    %0,0(%3)\n"
-		"0: ltr  %1,%0\n"
-		"   jle  1f\n"
-		"   ahi  %1,-1\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b\n"
+	asm volatile(
+		"	l	%0,0(%3)\n"
+		"0:	ltr	%1,%0\n"
+		"	jle	1f\n"
+		"	ahi	%1,-1\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b\n"
 		"1:"
 		: "=&d" (old_val), "=&d" (new_val), "=m" (sem->count.counter)
 		: "a" (&sem->count.counter), "m" (sem->count.counter)
-		: "cc", "memory" );
+		: "cc", "memory");
 	return old_val <= 0;
 }
 
diff --git a/include/asm-s390/sfp-machine.h b/include/asm-s390/sfp-machine.h
index de69dfa..8ca8c77 100644
--- a/include/asm-s390/sfp-machine.h
+++ b/include/asm-s390/sfp-machine.h
@@ -76,21 +76,23 @@
 	unsigned int __r2 = (x2) + (y2);			\
 	unsigned int __r1 = (x1);				\
 	unsigned int __r0 = (x0);				\
-	__asm__ ("   alr %2,%3\n"				\
-		 "   brc 12,0f\n"				\
-		 "   lhi 0,1\n"					\
-		 "   alr %1,0\n"				\
-		 "   brc 12,0f\n"				\
-		 "   alr %0,0\n"				\
-		 "0:"						\
-		 : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
-		 : "d" (y0), "i" (1) : "cc", "0" );		\
-	__asm__ ("   alr %1,%2\n"				\
-		 "   brc 12,0f\n"				\
-		 "   ahi %0,1\n"				\
-		 "0:"						\
-		 : "+&d" (__r2), "+&d" (__r1)			\
-		 : "d" (y1) : "cc" );				\
+	asm volatile(						\
+		"	alr	%2,%3\n"			\
+		"	brc	12,0f\n"			\
+		"	lhi	0,1\n"				\
+		"	alr	%1,0\n"				\
+		"	brc	12,0f\n"			\
+		"	alr	%0,0\n"				\
+		"0:"						\
+		: "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
+		: "d" (y0), "i" (1) : "cc", "0" );		\
+	asm volatile(						\
+		"	alr	%1,%2\n"			\
+		"	brc	12,0f\n"			\
+		"	ahi	%0,1\n"				\
+		"0:"						\
+		: "+&d" (__r2), "+&d" (__r1)			\
+		: "d" (y1) : "cc");				\
 	(r2) = __r2;						\
 	(r1) = __r1;						\
 	(r0) = __r0;						\
@@ -100,21 +102,23 @@
 	unsigned int __r2 = (x2) - (y2);			\
 	unsigned int __r1 = (x1);				\
 	unsigned int __r0 = (x0);				\
-	__asm__ ("   slr %2,%3\n"				\
-		 "   brc 3,0f\n"				\
-		 "   lhi 0,1\n"					\
-		 "   slr %1,0\n"				\
-		 "   brc 3,0f\n"				\
-		 "   slr %0,0\n"				\
-		 "0:"						\
-		 : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
-		 : "d" (y0) : "cc", "0" );			\
-	__asm__ ("   slr %1,%2\n"				\
-		 "   brc 3,0f\n"				\
-		 "   ahi %0,-1\n"				\
-		 "0:"						\
-		 : "+&d" (__r2), "+&d" (__r1)			\
-		 : "d" (y1) : "cc" );				\
+	asm volatile(						\
+		"	slr   %2,%3\n"				\
+		"	brc	3,0f\n"				\
+		"	lhi	0,1\n"				\
+		"	slr	%1,0\n"				\
+		"	brc	3,0f\n"				\
+		"	slr	%0,0\n"				\
+		"0:"						\
+		: "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
+		: "d" (y0) : "cc", "0");			\
+	asm volatile(						\
+		"	slr	%1,%2\n"			\
+		"	brc	3,0f\n"				\
+		"	ahi	%0,-1\n"			\
+		"0:"						\
+		: "+&d" (__r2), "+&d" (__r1)			\
+		: "d" (y1) : "cc");				\
 	(r2) = __r2;						\
 	(r1) = __r1;						\
 	(r0) = __r0;						\
diff --git a/include/asm-s390/sigp.h b/include/asm-s390/sigp.h
index fc56458..e16d56f 100644
--- a/include/asm-s390/sigp.h
+++ b/include/asm-s390/sigp.h
@@ -70,16 +70,16 @@
 static inline sigp_ccode
 signal_processor(__u16 cpu_addr, sigp_order_code order_code)
 {
+	register unsigned long reg1 asm ("1") = 0;
 	sigp_ccode ccode;
 
-	__asm__ __volatile__(
-		"    sr     1,1\n"        /* parameter=0 in gpr 1 */
-		"    sigp   1,%1,0(%2)\n"
-		"    ipm    %0\n"
-		"    srl    %0,28\n"
-		: "=d" (ccode)
-		: "d" (__cpu_logical_map[cpu_addr]), "a" (order_code)
-		: "cc" , "memory", "1" );
+	asm volatile(
+		"	sigp	%1,%2,0(%3)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		:	"=d"	(ccode)
+		: "d" (reg1), "d" (__cpu_logical_map[cpu_addr]),
+		  "a" (order_code) : "cc" , "memory");
 	return ccode;
 }
 
@@ -87,20 +87,18 @@
  * Signal processor with parameter
  */
 static inline sigp_ccode
-signal_processor_p(__u32 parameter, __u16 cpu_addr,
-		   sigp_order_code order_code)
+signal_processor_p(__u32 parameter, __u16 cpu_addr, sigp_order_code order_code)
 {
+	register unsigned int reg1 asm ("1") = parameter;
 	sigp_ccode ccode;
-	
-	__asm__ __volatile__(
-		"    lr     1,%1\n"       /* parameter in gpr 1 */
-		"    sigp   1,%2,0(%3)\n"
-		"    ipm    %0\n"
-		"    srl    %0,28\n"
+
+	asm volatile(
+		"	sigp	%1,%2,0(%3)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
 		: "=d" (ccode)
-		: "d" (parameter), "d" (__cpu_logical_map[cpu_addr]),
-                  "a" (order_code)
-		: "cc" , "memory", "1" );
+		: "d" (reg1), "d" (__cpu_logical_map[cpu_addr]),
+		  "a" (order_code) : "cc" , "memory");
 	return ccode;
 }
 
@@ -108,24 +106,21 @@
  * Signal processor with parameter and return status
  */
 static inline sigp_ccode
-signal_processor_ps(__u32 *statusptr, __u32 parameter,
-		    __u16 cpu_addr, sigp_order_code order_code)
+signal_processor_ps(__u32 *statusptr, __u32 parameter, __u16 cpu_addr,
+		    sigp_order_code order_code)
 {
+	register unsigned int reg1 asm ("1") = parameter;
 	sigp_ccode ccode;
-	
-	__asm__ __volatile__(
-		"    sr     2,2\n"        /* clear status */
-		"    lr     3,%2\n"       /* parameter in gpr 3 */
-		"    sigp   2,%3,0(%4)\n"
-		"    st     2,%1\n"
-		"    ipm    %0\n"
-		"    srl    %0,28\n"
-		: "=d" (ccode), "=m" (*statusptr)
-		: "d" (parameter), "d" (__cpu_logical_map[cpu_addr]),
-                  "a" (order_code)
-		: "cc" , "memory", "2" , "3"
-		);
-   return ccode;
+
+	asm volatile(
+		"	sigp	%1,%2,0(%3)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (ccode), "+d" (reg1)
+		: "d" (__cpu_logical_map[cpu_addr]), "a" (order_code)
+		: "cc" , "memory");
+	*statusptr = reg1;
+	return ccode;
 }
 
 #endif /* __SIGP__ */
diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h
index 9fb02e9..c3cf030 100644
--- a/include/asm-s390/smp.h
+++ b/include/asm-s390/smp.h
@@ -56,7 +56,7 @@
 {
         __u16 cpu_address;
  
-        __asm__ ("stap %0\n" : "=m" (cpu_address));
+	asm volatile("stap %0" : "=m" (cpu_address));
         return cpu_address;
 }
 
diff --git a/include/asm-s390/spinlock.h b/include/asm-s390/spinlock.h
index 273dbec..ce3edf6d6 100644
--- a/include/asm-s390/spinlock.h
+++ b/include/asm-s390/spinlock.h
@@ -11,17 +11,36 @@
 #ifndef __ASM_SPINLOCK_H
 #define __ASM_SPINLOCK_H
 
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+
 static inline int
 _raw_compare_and_swap(volatile unsigned int *lock,
 		      unsigned int old, unsigned int new)
 {
-	asm volatile ("cs %0,%3,0(%4)"
-		      : "=d" (old), "=m" (*lock)
-		      : "0" (old), "d" (new), "a" (lock), "m" (*lock)
-		      : "cc", "memory" );
+	asm volatile(
+		"	cs	%0,%3,%1"
+		: "=d" (old), "=Q" (*lock)
+		: "0" (old), "d" (new), "Q" (*lock)
+		: "cc", "memory" );
 	return old;
 }
 
+#else /* __GNUC__ */
+
+static inline int
+_raw_compare_and_swap(volatile unsigned int *lock,
+		      unsigned int old, unsigned int new)
+{
+	asm volatile(
+		"	cs	%0,%3,0(%4)"
+		: "=d" (old), "=m" (*lock)
+		: "0" (old), "d" (new), "a" (lock), "m" (*lock)
+		: "cc", "memory" );
+	return old;
+}
+
+#endif /* __GNUC__ */
+
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
  * on the local processor, one does not.
diff --git a/include/asm-s390/string.h b/include/asm-s390/string.h
index 23a4c39..d074673 100644
--- a/include/asm-s390/string.h
+++ b/include/asm-s390/string.h
@@ -60,12 +60,13 @@
 	register int r0 asm("0") = (char) c;
 	const void *ret = s + n;
 
-	asm volatile ("0: srst  %0,%1\n"
-		      "   jo    0b\n"
-		      "   jl	1f\n"
-		      "   la    %0,0\n"
-		      "1:"
-		      : "+a" (ret), "+&a" (s) : "d" (r0) : "cc" );
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b\n"
+		"	jl	1f\n"
+		"	la	%0,0\n"
+		"1:"
+		: "+a" (ret), "+&a" (s) : "d" (r0) : "cc");
 	return (void *) ret;
 }
 
@@ -74,9 +75,10 @@
 	register int r0 asm("0") = (char) c;
 	const void *ret = s + n;
 
-	asm volatile ("0: srst  %0,%1\n"
-		      "   jo    0b\n"
-		      : "+a" (ret), "+&a" (s) : "d" (r0) : "cc" );
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b\n"
+		: "+a" (ret), "+&a" (s) : "d" (r0) : "cc");
 	return (void *) ret;
 }
 
@@ -86,12 +88,13 @@
 	unsigned long dummy;
 	char *ret = dst;
 
-	asm volatile ("0: srst  %0,%1\n"
-		      "   jo    0b\n"
-		      "1: mvst  %0,%2\n"
-		      "   jo    1b"
-		      : "=&a" (dummy), "+a" (dst), "+a" (src)
-		      : "d" (r0), "0" (0) : "cc", "memory" );
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b\n"
+		"1:	mvst	%0,%2\n"
+		"	jo	1b"
+		: "=&a" (dummy), "+a" (dst), "+a" (src)
+		: "d" (r0), "0" (0) : "cc", "memory" );
 	return ret;
 }
 
@@ -100,10 +103,11 @@
 	register int r0 asm("0") = 0;
 	char *ret = dst;
 
-	asm volatile ("0: mvst  %0,%1\n"
-		      "   jo    0b"
-		      : "+&a" (dst), "+&a" (src) : "d" (r0)
-		      : "cc", "memory" );
+	asm volatile(
+		"0:	mvst	%0,%1\n"
+		"	jo	0b"
+		: "+&a" (dst), "+&a" (src) : "d" (r0)
+		: "cc", "memory");
 	return ret;
 }
 
@@ -112,9 +116,10 @@
 	register unsigned long r0 asm("0") = 0;
 	const char *tmp = s;
 
-	asm volatile ("0: srst  %0,%1\n"
-		      "   jo    0b"
-		      : "+d" (r0), "+a" (tmp) :  : "cc" );
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b"
+		: "+d" (r0), "+a" (tmp) :  : "cc");
 	return r0 - (unsigned long) s;
 }
 
@@ -124,9 +129,10 @@
 	const char *tmp = s;
 	const char *end = s + n;
 
-	asm volatile ("0: srst  %0,%1\n"
-		      "   jo    0b"
-		      : "+a" (end), "+a" (tmp) : "d" (r0)  : "cc" );
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b"
+		: "+a" (end), "+a" (tmp) : "d" (r0)  : "cc");
 	return end - s;
 }
 
diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h
index 1604004..ccbafe4 100644
--- a/include/asm-s390/system.h
+++ b/include/asm-s390/system.h
@@ -23,74 +23,68 @@
 
 extern struct task_struct *__switch_to(void *, void *);
 
-#ifdef __s390x__
-#define __FLAG_SHIFT 56
-#else /* ! __s390x__ */
-#define __FLAG_SHIFT 24
-#endif /* ! __s390x__ */
-
 static inline void save_fp_regs(s390_fp_regs *fpregs)
 {
-	asm volatile (
-		"   std   0,8(%1)\n"
-		"   std   2,24(%1)\n"
-		"   std   4,40(%1)\n"
-		"   std   6,56(%1)"
-		: "=m" (*fpregs) : "a" (fpregs), "m" (*fpregs) : "memory" );
+	asm volatile(
+		"	std	0,8(%1)\n"
+		"	std	2,24(%1)\n"
+		"	std	4,40(%1)\n"
+		"	std	6,56(%1)"
+		: "=m" (*fpregs) : "a" (fpregs), "m" (*fpregs) : "memory");
 	if (!MACHINE_HAS_IEEE)
 		return;
 	asm volatile(
-		"   stfpc 0(%1)\n"
-		"   std   1,16(%1)\n"
-		"   std   3,32(%1)\n"
-		"   std   5,48(%1)\n"
-		"   std   7,64(%1)\n"
-		"   std   8,72(%1)\n"
-		"   std   9,80(%1)\n"
-		"   std   10,88(%1)\n"
-		"   std   11,96(%1)\n"
-		"   std   12,104(%1)\n"
-		"   std   13,112(%1)\n"
-		"   std   14,120(%1)\n"
-		"   std   15,128(%1)\n"
-		: "=m" (*fpregs) : "a" (fpregs), "m" (*fpregs) : "memory" );
+		"	stfpc	0(%1)\n"
+		"	std	1,16(%1)\n"
+		"	std	3,32(%1)\n"
+		"	std	5,48(%1)\n"
+		"	std	7,64(%1)\n"
+		"	std	8,72(%1)\n"
+		"	std	9,80(%1)\n"
+		"	std	10,88(%1)\n"
+		"	std	11,96(%1)\n"
+		"	std	12,104(%1)\n"
+		"	std	13,112(%1)\n"
+		"	std	14,120(%1)\n"
+		"	std	15,128(%1)\n"
+		: "=m" (*fpregs) : "a" (fpregs), "m" (*fpregs) : "memory");
 }
 
 static inline void restore_fp_regs(s390_fp_regs *fpregs)
 {
-	asm volatile (
-		"   ld    0,8(%0)\n"
-		"   ld    2,24(%0)\n"
-		"   ld    4,40(%0)\n"
-		"   ld    6,56(%0)"
-		: : "a" (fpregs), "m" (*fpregs) );
+	asm volatile(
+		"	ld	0,8(%0)\n"
+		"	ld	2,24(%0)\n"
+		"	ld	4,40(%0)\n"
+		"	ld	6,56(%0)"
+		: : "a" (fpregs), "m" (*fpregs));
 	if (!MACHINE_HAS_IEEE)
 		return;
 	asm volatile(
-		"   lfpc  0(%0)\n"
-		"   ld    1,16(%0)\n"
-		"   ld    3,32(%0)\n"
-		"   ld    5,48(%0)\n"
-		"   ld    7,64(%0)\n"
-		"   ld    8,72(%0)\n"
-		"   ld    9,80(%0)\n"
-		"   ld    10,88(%0)\n"
-		"   ld    11,96(%0)\n"
-		"   ld    12,104(%0)\n"
-		"   ld    13,112(%0)\n"
-		"   ld    14,120(%0)\n"
-		"   ld    15,128(%0)\n"
-		: : "a" (fpregs), "m" (*fpregs) );
+		"	lfpc	0(%0)\n"
+		"	ld	1,16(%0)\n"
+		"	ld	3,32(%0)\n"
+		"	ld	5,48(%0)\n"
+		"	ld	7,64(%0)\n"
+		"	ld	8,72(%0)\n"
+		"	ld	9,80(%0)\n"
+		"	ld	10,88(%0)\n"
+		"	ld	11,96(%0)\n"
+		"	ld	12,104(%0)\n"
+		"	ld	13,112(%0)\n"
+		"	ld	14,120(%0)\n"
+		"	ld	15,128(%0)\n"
+		: : "a" (fpregs), "m" (*fpregs));
 }
 
 static inline void save_access_regs(unsigned int *acrs)
 {
-	asm volatile ("stam 0,15,0(%0)" : : "a" (acrs) : "memory" );
+	asm volatile("stam 0,15,0(%0)" : : "a" (acrs) : "memory");
 }
 
 static inline void restore_access_regs(unsigned int *acrs)
 {
-	asm volatile ("lam 0,15,0(%0)" : : "a" (acrs) );
+	asm volatile("lam 0,15,0(%0)" : : "a" (acrs));
 }
 
 #define switch_to(prev,next,last) do {					     \
@@ -126,7 +120,7 @@
 	account_vtime(prev);						     \
 } while (0)
 
-#define nop() __asm__ __volatile__ ("nop")
+#define nop() asm volatile("nop")
 
 #define xchg(ptr,x)							  \
 ({									  \
@@ -147,15 +141,15 @@
 		shift = (3 ^ (addr & 3)) << 3;
 		addr ^= addr & 3;
 		asm volatile(
-			"    l   %0,0(%4)\n"
-			"0:  lr  0,%0\n"
-			"    nr  0,%3\n"
-			"    or  0,%2\n"
-			"    cs  %0,0,0(%4)\n"
-			"    jl  0b\n"
+			"	l	%0,0(%4)\n"
+			"0:	lr	0,%0\n"
+			"	nr	0,%3\n"
+			"	or	0,%2\n"
+			"	cs	%0,0,0(%4)\n"
+			"	jl	0b\n"
 			: "=&d" (old), "=m" (*(int *) addr)
 			: "d" (x << shift), "d" (~(255 << shift)), "a" (addr),
-			  "m" (*(int *) addr) : "memory", "cc", "0" );
+			  "m" (*(int *) addr) : "memory", "cc", "0");
 		x = old >> shift;
 		break;
 	case 2:
@@ -163,36 +157,36 @@
 		shift = (2 ^ (addr & 2)) << 3;
 		addr ^= addr & 2;
 		asm volatile(
-			"    l   %0,0(%4)\n"
-			"0:  lr  0,%0\n"
-			"    nr  0,%3\n"
-			"    or  0,%2\n"
-			"    cs  %0,0,0(%4)\n"
-			"    jl  0b\n"
+			"	l	%0,0(%4)\n"
+			"0:	lr	0,%0\n"
+			"	nr	0,%3\n"
+			"	or	0,%2\n"
+			"	cs	%0,0,0(%4)\n"
+			"	jl	0b\n"
 			: "=&d" (old), "=m" (*(int *) addr)
 			: "d" (x << shift), "d" (~(65535 << shift)), "a" (addr),
-			  "m" (*(int *) addr) : "memory", "cc", "0" );
+			  "m" (*(int *) addr) : "memory", "cc", "0");
 		x = old >> shift;
 		break;
 	case 4:
-		asm volatile (
-			"    l   %0,0(%3)\n"
-			"0:  cs  %0,%2,0(%3)\n"
-			"    jl  0b\n"
+		asm volatile(
+			"	l	%0,0(%3)\n"
+			"0:	cs	%0,%2,0(%3)\n"
+			"	jl	0b\n"
 			: "=&d" (old), "=m" (*(int *) ptr)
 			: "d" (x), "a" (ptr), "m" (*(int *) ptr)
-			: "memory", "cc" );
+			: "memory", "cc");
 		x = old;
 		break;
 #ifdef __s390x__
 	case 8:
-		asm volatile (
-			"    lg  %0,0(%3)\n"
-			"0:  csg %0,%2,0(%3)\n"
-			"    jl  0b\n"
+		asm volatile(
+			"	lg	%0,0(%3)\n"
+			"0:	csg	%0,%2,0(%3)\n"
+			"	jl	0b\n"
 			: "=&d" (old), "=m" (*(long *) ptr)
 			: "d" (x), "a" (ptr), "m" (*(long *) ptr)
-			: "memory", "cc" );
+			: "memory", "cc");
 		x = old;
 		break;
 #endif /* __s390x__ */
@@ -224,55 +218,55 @@
 		shift = (3 ^ (addr & 3)) << 3;
 		addr ^= addr & 3;
 		asm volatile(
-			"    l   %0,0(%4)\n"
-			"0:  nr  %0,%5\n"
-                        "    lr  %1,%0\n"
-			"    or  %0,%2\n"
-			"    or  %1,%3\n"
-			"    cs  %0,%1,0(%4)\n"
-			"    jnl 1f\n"
-			"    xr  %1,%0\n"
-			"    nr  %1,%5\n"
-			"    jnz 0b\n"
+			"	l	%0,0(%4)\n"
+			"0:	nr	%0,%5\n"
+			"	lr	%1,%0\n"
+			"	or	%0,%2\n"
+			"	or	%1,%3\n"
+			"	cs	%0,%1,0(%4)\n"
+			"	jnl	1f\n"
+			"	xr	%1,%0\n"
+			"	nr	%1,%5\n"
+			"	jnz	0b\n"
 			"1:"
 			: "=&d" (prev), "=&d" (tmp)
 			: "d" (old << shift), "d" (new << shift), "a" (ptr),
 			  "d" (~(255 << shift))
-			: "memory", "cc" );
+			: "memory", "cc");
 		return prev >> shift;
 	case 2:
 		addr = (unsigned long) ptr;
 		shift = (2 ^ (addr & 2)) << 3;
 		addr ^= addr & 2;
 		asm volatile(
-			"    l   %0,0(%4)\n"
-			"0:  nr  %0,%5\n"
-                        "    lr  %1,%0\n"
-			"    or  %0,%2\n"
-			"    or  %1,%3\n"
-			"    cs  %0,%1,0(%4)\n"
-			"    jnl 1f\n"
-			"    xr  %1,%0\n"
-			"    nr  %1,%5\n"
-			"    jnz 0b\n"
+			"	l	%0,0(%4)\n"
+			"0:	nr	%0,%5\n"
+			"	lr	%1,%0\n"
+			"	or	%0,%2\n"
+			"	or	%1,%3\n"
+			"	cs	%0,%1,0(%4)\n"
+			"	jnl	1f\n"
+			"	xr	%1,%0\n"
+			"	nr	%1,%5\n"
+			"	jnz	0b\n"
 			"1:"
 			: "=&d" (prev), "=&d" (tmp)
 			: "d" (old << shift), "d" (new << shift), "a" (ptr),
 			  "d" (~(65535 << shift))
-			: "memory", "cc" );
+			: "memory", "cc");
 		return prev >> shift;
 	case 4:
-		asm volatile (
-			"    cs  %0,%2,0(%3)\n"
+		asm volatile(
+			"	cs	%0,%2,0(%3)\n"
 			: "=&d" (prev) : "0" (old), "d" (new), "a" (ptr)
-			: "memory", "cc" );
+			: "memory", "cc");
 		return prev;
 #ifdef __s390x__
 	case 8:
-		asm volatile (
-			"    csg %0,%2,0(%3)\n"
+		asm volatile(
+			"	csg	%0,%2,0(%3)\n"
 			: "=&d" (prev) : "0" (old), "d" (new), "a" (ptr)
-			: "memory", "cc" );
+			: "memory", "cc");
 		return prev;
 #endif /* __s390x__ */
         }
@@ -289,8 +283,8 @@
  * all memory ops have completed wrt other CPU's ( see 7-15 POP  DJB ).
  */
 
-#define eieio()  __asm__ __volatile__ ( "bcr 15,0" : : : "memory" ) 
-# define SYNC_OTHER_CORES(x)   eieio() 
+#define eieio()	asm volatile("bcr 15,0" : : : "memory")
+#define SYNC_OTHER_CORES(x)   eieio()
 #define mb()    eieio()
 #define rmb()   eieio()
 #define wmb()   eieio()
@@ -307,117 +301,56 @@
 
 #ifdef __s390x__
 
-#define __ctl_load(array, low, high) ({ \
-	typedef struct { char _[sizeof(array)]; } addrtype; \
-	__asm__ __volatile__ ( \
-		"   bras  1,0f\n" \
-                "   lctlg 0,0,0(%0)\n" \
-		"0: ex    %1,0(1)" \
-		: : "a" (&array), "a" (((low)<<4)+(high)), \
-		    "m" (*(addrtype *)(array)) : "1" ); \
+#define __ctl_load(array, low, high) ({				\
+	typedef struct { char _[sizeof(array)]; } addrtype;	\
+	asm volatile(						\
+		"	lctlg	%1,%2,0(%0)\n"			\
+		: : "a" (&array), "i" (low), "i" (high),	\
+		    "m" (*(addrtype *)(array)));		\
 	})
 
-#define __ctl_store(array, low, high) ({ \
-	typedef struct { char _[sizeof(array)]; } addrtype; \
-	__asm__ __volatile__ ( \
-		"   bras  1,0f\n" \
-		"   stctg 0,0,0(%1)\n" \
-		"0: ex    %2,0(1)" \
-		: "=m" (*(addrtype *)(array)) \
-		: "a" (&array), "a" (((low)<<4)+(high)) : "1" ); \
+#define __ctl_store(array, low, high) ({			\
+	typedef struct { char _[sizeof(array)]; } addrtype;	\
+	asm volatile(						\
+		"	stctg	%2,%3,0(%1)\n"			\
+		: "=m" (*(addrtype *)(array))			\
+		: "a" (&array), "i" (low), "i" (high));		\
 	})
 
-#define __ctl_set_bit(cr, bit) ({ \
-        __u8 __dummy[24]; \
-        __asm__ __volatile__ ( \
-                "    bras  1,0f\n"       /* skip indirect insns */ \
-                "    stctg 0,0,0(%1)\n" \
-                "    lctlg 0,0,0(%1)\n" \
-                "0:  ex    %2,0(1)\n"    /* execute stctl */ \
-                "    lg    0,0(%1)\n" \
-                "    ogr   0,%3\n"       /* set the bit */ \
-                "    stg   0,0(%1)\n" \
-                "1:  ex    %2,6(1)"      /* execute lctl */ \
-                : "=m" (__dummy) \
-		: "a" ((((unsigned long) &__dummy) + 7) & ~7UL), \
-		  "a" (cr*17), "a" (1L<<(bit)) \
-                : "cc", "0", "1" ); \
-        })
-
-#define __ctl_clear_bit(cr, bit) ({ \
-        __u8 __dummy[16]; \
-        __asm__ __volatile__ ( \
-                "    bras  1,0f\n"       /* skip indirect insns */ \
-                "    stctg 0,0,0(%1)\n" \
-                "    lctlg 0,0,0(%1)\n" \
-                "0:  ex    %2,0(1)\n"    /* execute stctl */ \
-                "    lg    0,0(%1)\n" \
-                "    ngr   0,%3\n"       /* set the bit */ \
-                "    stg   0,0(%1)\n" \
-                "1:  ex    %2,6(1)"      /* execute lctl */ \
-                : "=m" (__dummy) \
-		: "a" ((((unsigned long) &__dummy) + 7) & ~7UL), \
-		  "a" (cr*17), "a" (~(1L<<(bit))) \
-                : "cc", "0", "1" ); \
-        })
-
 #else /* __s390x__ */
 
-#define __ctl_load(array, low, high) ({ \
-	typedef struct { char _[sizeof(array)]; } addrtype; \
-	__asm__ __volatile__ ( \
-		"   bras  1,0f\n" \
-                "   lctl 0,0,0(%0)\n" \
-		"0: ex    %1,0(1)" \
-		: : "a" (&array), "a" (((low)<<4)+(high)), \
-		    "m" (*(addrtype *)(array)) : "1" ); \
+#define __ctl_load(array, low, high) ({				\
+	typedef struct { char _[sizeof(array)]; } addrtype;	\
+	asm volatile(						\
+		"	lctl	%1,%2,0(%0)\n"			\
+		: : "a" (&array), "i" (low), "i" (high),	\
+		    "m" (*(addrtype *)(array)));		\
+})
+
+#define __ctl_store(array, low, high) ({			\
+	typedef struct { char _[sizeof(array)]; } addrtype;	\
+	asm volatile(						\
+		"	stctl	%2,%3,0(%1)\n"			\
+		: "=m" (*(addrtype *)(array))			\
+		: "a" (&array), "i" (low), "i" (high));		\
 	})
 
-#define __ctl_store(array, low, high) ({ \
-	typedef struct { char _[sizeof(array)]; } addrtype; \
-	__asm__ __volatile__ ( \
-		"   bras  1,0f\n" \
-		"   stctl 0,0,0(%1)\n" \
-		"0: ex    %2,0(1)" \
-		: "=m" (*(addrtype *)(array)) \
-		: "a" (&array), "a" (((low)<<4)+(high)): "1" ); \
-	})
-
-#define __ctl_set_bit(cr, bit) ({ \
-        __u8 __dummy[16]; \
-        __asm__ __volatile__ ( \
-                "    bras  1,0f\n"       /* skip indirect insns */ \
-                "    stctl 0,0,0(%1)\n" \
-                "    lctl  0,0,0(%1)\n" \
-                "0:  ex    %2,0(1)\n"    /* execute stctl */ \
-                "    l     0,0(%1)\n" \
-                "    or    0,%3\n"       /* set the bit */ \
-                "    st    0,0(%1)\n" \
-                "1:  ex    %2,4(1)"      /* execute lctl */ \
-                : "=m" (__dummy) \
-		: "a" ((((unsigned long) &__dummy) + 7) & ~7UL), \
-		  "a" (cr*17), "a" (1<<(bit)) \
-                : "cc", "0", "1" ); \
-        })
-
-#define __ctl_clear_bit(cr, bit) ({ \
-        __u8 __dummy[16]; \
-        __asm__ __volatile__ ( \
-                "    bras  1,0f\n"       /* skip indirect insns */ \
-                "    stctl 0,0,0(%1)\n" \
-                "    lctl  0,0,0(%1)\n" \
-                "0:  ex    %2,0(1)\n"    /* execute stctl */ \
-                "    l     0,0(%1)\n" \
-                "    nr    0,%3\n"       /* set the bit */ \
-                "    st    0,0(%1)\n" \
-                "1:  ex    %2,4(1)"      /* execute lctl */ \
-                : "=m" (__dummy) \
-		: "a" ((((unsigned long) &__dummy) + 7) & ~7UL), \
-		  "a" (cr*17), "a" (~(1<<(bit))) \
-                : "cc", "0", "1" ); \
-        })
 #endif /* __s390x__ */
 
+#define __ctl_set_bit(cr, bit) ({	\
+	unsigned long __dummy;		\
+	__ctl_store(__dummy, cr, cr);	\
+	__dummy |= 1UL << (bit);	\
+	__ctl_load(__dummy, cr, cr);	\
+})
+
+#define __ctl_clear_bit(cr, bit) ({	\
+	unsigned long __dummy;		\
+	__ctl_store(__dummy, cr, cr);	\
+	__dummy &= ~(1UL << (bit));	\
+	__ctl_load(__dummy, cr, cr);	\
+})
+
 #include <linux/irqflags.h>
 
 /*
@@ -427,8 +360,7 @@
 static inline void
 __set_psw_mask(unsigned long mask)
 {
-	local_save_flags(mask);
-	__load_psw_mask(mask);
+	__load_psw_mask(mask | (__raw_local_irq_stosm(0x00) & ~(-1UL >> 8)));
 }
 
 #define local_mcck_enable()  __set_psw_mask(PSW_KERNEL_BITS)
diff --git a/include/asm-s390/timex.h b/include/asm-s390/timex.h
index 5d0332a..4df4a41 100644
--- a/include/asm-s390/timex.h
+++ b/include/asm-s390/timex.h
@@ -15,20 +15,21 @@
 
 typedef unsigned long long cycles_t;
 
-static inline cycles_t get_cycles(void)
-{
-	cycles_t cycles;
-
-	__asm__ __volatile__ ("stck 0(%1)" : "=m" (cycles) : "a" (&cycles) : "cc");
-	return cycles >> 2;
-}
-
 static inline unsigned long long get_clock (void)
 {
 	unsigned long long clk;
 
-	__asm__ __volatile__ ("stck 0(%1)" : "=m" (clk) : "a" (&clk) : "cc");
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+	asm volatile("stck %0" : "=Q" (clk) : : "cc");
+#else /* __GNUC__ */
+	asm volatile("stck 0(%1)" : "=m" (clk) : "a" (&clk) : "cc");
+#endif /* __GNUC__ */
 	return clk;
 }
 
+static inline cycles_t get_cycles(void)
+{
+	return (cycles_t) get_clock() >> 2;
+}
+
 #endif
diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h
index 73cd85b..fa4dc91 100644
--- a/include/asm-s390/tlbflush.h
+++ b/include/asm-s390/tlbflush.h
@@ -25,7 +25,7 @@
  */
 
 #define local_flush_tlb() \
-do {  __asm__ __volatile__("ptlb": : :"memory"); } while (0)
+do {  asm volatile("ptlb": : :"memory"); } while (0)
 
 #ifndef CONFIG_SMP
 
@@ -68,24 +68,24 @@
 
 static inline void global_flush_tlb(void)
 {
+	register unsigned long reg2 asm("2");
+	register unsigned long reg3 asm("3");
+	register unsigned long reg4 asm("4");
+	long dummy;
+
 #ifndef __s390x__
 	if (!MACHINE_HAS_CSP) {
 		smp_ptlb_all();
 		return;
 	}
 #endif /* __s390x__ */
-	{
-		register unsigned long addr asm("4");
-		long dummy;
 
-		dummy = 0;
-		addr = ((unsigned long) &dummy) + 1;
-		__asm__ __volatile__ (
-			"    slr  2,2\n"
-			"    slr  3,3\n"
-			"    csp  2,%0"
-			: : "a" (addr), "m" (dummy) : "cc", "2", "3" );
-	}
+	dummy = 0;
+	reg2 = reg3 = 0;
+	reg4 = ((unsigned long) &dummy) + 1;
+	asm volatile(
+		"	csp	%0,%2"
+		: : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" );
 }
 
 /*
@@ -102,9 +102,9 @@
 	if (unlikely(cpus_empty(mm->cpu_vm_mask)))
 		return;
 	if (MACHINE_HAS_IDTE) {
-		asm volatile (".insn rrf,0xb98e0000,0,%0,%1,0"
-			      : : "a" (2048),
-			      "a" (__pa(mm->pgd)&PAGE_MASK) : "cc" );
+		asm volatile(
+			"	.insn	rrf,0xb98e0000,0,%0,%1,0"
+			: : "a" (2048), "a" (__pa(mm->pgd)&PAGE_MASK) : "cc");
 		return;
 	}
 	preempt_disable();
diff --git a/include/asm-s390/uaccess.h b/include/asm-s390/uaccess.h
index e2047b0..72ae4ef 100644
--- a/include/asm-s390/uaccess.h
+++ b/include/asm-s390/uaccess.h
@@ -38,25 +38,14 @@
 #define get_ds()        (KERNEL_DS)
 #define get_fs()        (current->thread.mm_segment)
 
-#ifdef __s390x__
 #define set_fs(x) \
 ({									\
 	unsigned long __pto;						\
 	current->thread.mm_segment = (x);				\
 	__pto = current->thread.mm_segment.ar4 ?			\
 		S390_lowcore.user_asce : S390_lowcore.kernel_asce;	\
-	asm volatile ("lctlg 7,7,%0" : : "m" (__pto) );			\
+	__ctl_load(__pto, 7, 7);					\
 })
-#else /* __s390x__ */
-#define set_fs(x) \
-({									\
-	unsigned long __pto;						\
-	current->thread.mm_segment = (x);				\
-	__pto = current->thread.mm_segment.ar4 ?			\
-		S390_lowcore.user_asce : S390_lowcore.kernel_asce;	\
-	asm volatile ("lctl  7,7,%0" : : "m" (__pto) );			\
-})
-#endif /* __s390x__ */
 
 #define segment_eq(a,b) ((a).ar4 == (b).ar4)
 
diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h
index d49c54c..0361ac5 100644
--- a/include/asm-s390/unistd.h
+++ b/include/asm-s390/unistd.h
@@ -355,145 +355,145 @@
 
 #define _svc_clobber "1", "cc", "memory"
 
-#define _syscall0(type,name)				     \
-type name(void) {					     \
-	register long __svcres asm("2");		     \
-	long __res;					     \
-	__asm__ __volatile__ (				     \
-		"    .if %1 < 256\n"			     \
-		"    svc %b1\n"				     \
-		"    .else\n"				     \
-		"    la  %%r1,%1\n"			     \
-		"    svc 0\n"				     \
-		"    .endif"				     \
-		: "=d" (__svcres)			     \
-		: "i" (__NR_##name)			     \
-		: _svc_clobber );			     \
-	__res = __svcres;				     \
-	__syscall_return(type,__res);			     \
+#define _syscall0(type,name)					\
+type name(void) {						\
+	register long __svcres asm("2");			\
+	long __res;						\
+	asm volatile(						\
+		"	.if	%1 < 256\n"			\
+		"	svc	%b1\n"				\
+		"	.else\n"				\
+		"	la	%%r1,%1\n"			\
+		"	svc	0\n"				\
+		"	.endif"					\
+		: "=d" (__svcres)				\
+		: "i" (__NR_##name)				\
+		: _svc_clobber);				\
+	__res = __svcres;					\
+	__syscall_return(type,__res);				\
 }
 
-#define _syscall1(type,name,type1,arg1)			     \
-type name(type1 arg1) {					     \
-	register type1 __arg1 asm("2") = arg1;		     \
-	register long __svcres asm("2");		     \
-	long __res;					     \
-	__asm__ __volatile__ (				     \
-		"    .if %1 < 256\n"			     \
-		"    svc %b1\n"				     \
-		"    .else\n"				     \
-		"    la  %%r1,%1\n"			     \
-		"    svc 0\n"				     \
-		"    .endif"				     \
-		: "=d" (__svcres)			     \
-		: "i" (__NR_##name),			     \
-		  "0" (__arg1)				     \
-		: _svc_clobber );			     \
-	__res = __svcres;				     \
-	__syscall_return(type,__res);			     \
+#define _syscall1(type,name,type1,arg1)				\
+type name(type1 arg1) {						\
+	register type1 __arg1 asm("2") = arg1;			\
+	register long __svcres asm("2");			\
+	long __res;						\
+	asm volatile(						\
+		"	.if	%1 < 256\n"			\
+		"	svc	%b1\n"				\
+		"	.else\n"				\
+		"	la	%%r1,%1\n"			\
+		"	svc	0\n"				\
+		"	.endif"					\
+		: "=d" (__svcres)				\
+		: "i" (__NR_##name),				\
+		  "0" (__arg1)					\
+		: _svc_clobber);				\
+	__res = __svcres;					\
+	__syscall_return(type,__res);				\
 }
 
-#define _syscall2(type,name,type1,arg1,type2,arg2)	     \
-type name(type1 arg1, type2 arg2) {			     \
-	register type1 __arg1 asm("2") = arg1;		     \
-	register type2 __arg2 asm("3") = arg2;		     \
-	register long __svcres asm("2");		     \
-	long __res;					     \
-	__asm__ __volatile__ (				     \
-		"    .if %1 < 256\n"			     \
-		"    svc %b1\n"				     \
-		"    .else\n"				     \
-		"    la %%r1,%1\n"			     \
-		"    svc 0\n"				     \
-		"    .endif"				     \
-		: "=d" (__svcres)			     \
-		: "i" (__NR_##name),			     \
-		  "0" (__arg1),				     \
-		  "d" (__arg2)				     \
-		: _svc_clobber );			     \
-	__res = __svcres;				     \
-	__syscall_return(type,__res);			     \
+#define _syscall2(type,name,type1,arg1,type2,arg2)		\
+type name(type1 arg1, type2 arg2) {				\
+	register type1 __arg1 asm("2") = arg1;			\
+	register type2 __arg2 asm("3") = arg2;			\
+	register long __svcres asm("2");			\
+	long __res;						\
+	asm volatile(						\
+		"	.if	%1 < 256\n"			\
+		"	svc	%b1\n"				\
+		"	.else\n"				\
+		"	la	%%r1,%1\n"			\
+		"	svc	0\n"				\
+		"	.endif"					\
+		: "=d" (__svcres)				\
+		: "i" (__NR_##name),				\
+		  "0" (__arg1),					\
+		  "d" (__arg2)					\
+		: _svc_clobber );				\
+	__res = __svcres;					\
+	__syscall_return(type,__res);				\
 }
 
-#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)\
-type name(type1 arg1, type2 arg2, type3 arg3) {		     \
-	register type1 __arg1 asm("2") = arg1;		     \
-	register type2 __arg2 asm("3") = arg2;		     \
-	register type3 __arg3 asm("4") = arg3;		     \
-	register long __svcres asm("2");		     \
-	long __res;					     \
-	__asm__ __volatile__ (				     \
-		"    .if %1 < 256\n"			     \
-		"    svc %b1\n"				     \
-		"    .else\n"				     \
-		"    la  %%r1,%1\n"			     \
-		"    svc 0\n"				     \
-		"    .endif"				     \
-		: "=d" (__svcres)			     \
-		: "i" (__NR_##name),			     \
-		  "0" (__arg1),				     \
-		  "d" (__arg2),				     \
-		  "d" (__arg3)				     \
-		: _svc_clobber );			     \
-	__res = __svcres;				     \
-	__syscall_return(type,__res);			     \
+#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)	\
+type name(type1 arg1, type2 arg2, type3 arg3) {			\
+	register type1 __arg1 asm("2") = arg1;			\
+	register type2 __arg2 asm("3") = arg2;			\
+	register type3 __arg3 asm("4") = arg3;			\
+	register long __svcres asm("2");			\
+	long __res;						\
+	asm volatile(						\
+		"	.if	%1 < 256\n"			\
+		"	svc	%b1\n"				\
+		"	.else\n"				\
+		"	la	%%r1,%1\n"			\
+		"	svc	0\n"				\
+		"	.endif"					\
+		: "=d" (__svcres)				\
+		: "i" (__NR_##name),				\
+		  "0" (__arg1),					\
+		  "d" (__arg2),					\
+		  "d" (__arg3)					\
+		: _svc_clobber);				\
+	__res = __svcres;					\
+	__syscall_return(type,__res);				\
 }
 
-#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,\
-		  type4,name4)				     \
-type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {  \
-	register type1 __arg1 asm("2") = arg1;		     \
-	register type2 __arg2 asm("3") = arg2;		     \
-	register type3 __arg3 asm("4") = arg3;		     \
-	register type4 __arg4 asm("5") = arg4;		     \
-	register long __svcres asm("2");		     \
-	long __res;					     \
-	__asm__ __volatile__ (				     \
-		"    .if %1 < 256\n"			     \
-		"    svc %b1\n"				     \
-		"    .else\n"				     \
-		"    la  %%r1,%1\n"			     \
-		"    svc 0\n"				     \
-		"    .endif"				     \
-		: "=d" (__svcres)			     \
-		: "i" (__NR_##name),			     \
-		  "0" (__arg1),				     \
-		  "d" (__arg2),				     \
-		  "d" (__arg3),				     \
-		  "d" (__arg4)				     \
-		: _svc_clobber );			     \
-	__res = __svcres;				     \
-	__syscall_return(type,__res);			     \
+#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,	\
+		  type4,name4)					\
+type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {	\
+	register type1 __arg1 asm("2") = arg1;			\
+	register type2 __arg2 asm("3") = arg2;			\
+	register type3 __arg3 asm("4") = arg3;			\
+	register type4 __arg4 asm("5") = arg4;			\
+	register long __svcres asm("2");			\
+	long __res;						\
+	asm volatile(						\
+		"	.if	%1 < 256\n"			\
+		"	svc	%b1\n"				\
+		"	.else\n"				\
+		"	la	%%r1,%1\n"			\
+		"	svc	0\n"				\
+		"	.endif"					\
+		: "=d" (__svcres)				\
+		: "i" (__NR_##name),				\
+		  "0" (__arg1),					\
+		  "d" (__arg2),					\
+		  "d" (__arg3),					\
+		  "d" (__arg4)					\
+		: _svc_clobber);				\
+	__res = __svcres;					\
+	__syscall_return(type,__res);				\
 }
 
-#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,\
-		  type4,name4,type5,name5)		     \
-type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \
-	  type5 arg5) {					     \
-	register type1 __arg1 asm("2") = arg1;		     \
-	register type2 __arg2 asm("3") = arg2;		     \
-	register type3 __arg3 asm("4") = arg3;		     \
-	register type4 __arg4 asm("5") = arg4;		     \
-	register type5 __arg5 asm("6") = arg5;		     \
-	register long __svcres asm("2");		     \
-	long __res;					     \
-	__asm__ __volatile__ (				     \
-		"    .if %1 < 256\n"			     \
-		"    svc %b1\n"				     \
-		"    .else\n"				     \
-		"    la  %%r1,%1\n"			     \
-		"    svc 0\n"				     \
-		"    .endif"				     \
-		: "=d" (__svcres)			     \
-		: "i" (__NR_##name),			     \
-		  "0" (__arg1),				     \
-		  "d" (__arg2),				     \
-		  "d" (__arg3),				     \
-		  "d" (__arg4),				     \
-		  "d" (__arg5)				     \
-		: _svc_clobber );			     \
-	__res = __svcres;				     \
-	__syscall_return(type,__res);			     \
+#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,	\
+		  type4,name4,type5,name5)			\
+type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4,	\
+	  type5 arg5) {						\
+	register type1 __arg1 asm("2") = arg1;			\
+	register type2 __arg2 asm("3") = arg2;			\
+	register type3 __arg3 asm("4") = arg3;			\
+	register type4 __arg4 asm("5") = arg4;			\
+	register type5 __arg5 asm("6") = arg5;			\
+	register long __svcres asm("2");			\
+	long __res;						\
+	asm volatile(						\
+		"	.if	%1 < 256\n"			\
+		"	svc	%b1\n"				\
+		"	.else\n"				\
+		"	la	%%r1,%1\n"			\
+		"	svc	0\n"				\
+		"	.endif"					\
+		: "=d" (__svcres)				\
+		: "i" (__NR_##name),				\
+		  "0" (__arg1),					\
+		  "d" (__arg2),					\
+		  "d" (__arg3),					\
+		  "d" (__arg4),					\
+		  "d" (__arg5)					\
+		: _svc_clobber);				\
+	__res = __svcres;					\
+	__syscall_return(type,__res);				\
 }
 
 #define __ARCH_WANT_IPC_PARSE_VERSION