Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86: (613 commits)
  x86: standalone trampoline code
  x86: move suspend wakeup code to C
  x86: coding style fixes to arch/x86/kernel/acpi/sleep.c
  x86: setup_trampoline() - fix section mismatch warning
  x86: section mismatch fixes, #1
  x86: fix paranoia about using BIOS quickboot mechanism.
  x86: print out buggy mptable
  x86: use cpu_online()
  x86: use cpumask_of_cpu()
  x86: remove unnecessary tmp local variable
  x86: remove unnecessary memset()
  x86: use ioapic_read_entry() and ioapic_write_entry()
  x86: avoid redundant loop in io_apic_level_ack_pending()
  x86: remove superfluous initialisation in boot code.
  x86: merge mpparse_{32,64}.c
  x86: unify mp_register_gsi
  x86: unify mp_config_acpi_legacy_irqs
  x86: unify mp_register_ioapic
  x86: unify uniq_io_apic_id
  x86: unify smp_scan_config
  ...
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
index 0eb7c58..e054209 100644
--- a/Documentation/s390/s390dbf.txt
+++ b/Documentation/s390/s390dbf.txt
@@ -115,6 +115,27 @@
 Description:  Allocates memory for a debug log     
               Must not be called within an interrupt handler 
 
+----------------------------------------------------------------------------
+debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
+				  int buf_size, mode_t mode, uid_t uid,
+				  gid_t gid);
+
+Parameter:    name:	   Name of debug log (e.g. used for debugfs entry)
+	      pages:	   Number of pages, which will be allocated per area
+	      nr_areas:    Number of debug areas
+	      buf_size:    Size of data area in each debug entry
+	      mode:	   File mode for debugfs files. E.g. S_IRWXUGO
+	      uid:	   User ID for debugfs files. Currently only 0 is
+			   supported.
+	      gid:	   Group ID for debugfs files. Currently only 0 is
+			   supported.
+
+Return Value: Handle for generated debug area
+	      NULL if register failed
+
+Description:  Allocates memory for a debug log
+	      Must not be called within an interrupt handler
+
 ---------------------------------------------------------------------------
 void debug_unregister (debug_info_t * id);
 
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index dccf052..ac706c1 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -7,7 +7,7 @@
 EXTRA_CFLAGS	:= -Werror -Wno-sign-compare
 
 obj-y    := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
-	    irq_alpha.o signal.o setup.o ptrace.o time.o semaphore.o \
+	    irq_alpha.o signal.o setup.o ptrace.o time.o \
 	    alpha_ksyms.o systbls.o err_common.o io.o
 
 obj-$(CONFIG_VGA_HOSE)	+= console.o
diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c
index e9762a3..d96e742 100644
--- a/arch/alpha/kernel/alpha_ksyms.c
+++ b/arch/alpha/kernel/alpha_ksyms.c
@@ -77,15 +77,6 @@
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__strnlen_user);
 
-/* Semaphore helper functions.  */
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__up_wakeup);
-EXPORT_SYMBOL(down);
-EXPORT_SYMBOL(down_interruptible);
-EXPORT_SYMBOL(down_trylock);
-EXPORT_SYMBOL(up);
-
 /* 
  * SMP-specific symbols.
  */
diff --git a/arch/alpha/kernel/semaphore.c b/arch/alpha/kernel/semaphore.c
deleted file mode 100644
index 8d2982a..0000000
--- a/arch/alpha/kernel/semaphore.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Alpha semaphore implementation.
- *
- * (C) Copyright 1996 Linus Torvalds
- * (C) Copyright 1999, 2000 Richard Henderson
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-
-/*
- * This is basically the PPC semaphore scheme ported to use
- * the Alpha ll/sc sequences, so see the PPC code for
- * credits.
- */
-
-/*
- * Atomically update sem->count.
- * This does the equivalent of the following:
- *
- *	old_count = sem->count;
- *	tmp = MAX(old_count, 0) + incr;
- *	sem->count = tmp;
- *	return old_count;
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	long old_count, tmp = 0;
-
-	__asm__ __volatile__(
-	"1:	ldl_l	%0,%2\n"
-	"	cmovgt	%0,%0,%1\n"
-	"	addl	%1,%3,%1\n"
-	"	stl_c	%1,%2\n"
-	"	beq	%1,2f\n"
-	"	mb\n"
-	".subsection 2\n"
-	"2:	br	1b\n"
-	".previous"
-	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-	: "Ir" (incr), "1" (tmp), "m" (sem->count));
-
-	return old_count;
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- */
-
-void __sched
-__down_failed(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down failed(%p)\n",
-	       tsk->comm, task_pid_nr(tsk), sem);
-#endif
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	wmb();
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	/*
-	 * Try to get the semaphore.  If the count is > 0, then we've
-	 * got the semaphore; we decrement count and exit the loop.
-	 * If the count is 0 or negative, we set it to -1, indicating
-	 * that we are asleep, and then sleep.
-	 */
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-
-	/*
-	 * If there are any more sleepers, wake one of them up so
-	 * that it can either get the semaphore, or set count to -1
-	 * indicating that there are still processes sleeping.
-	 */
-	wake_up(&sem->wait);
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down acquired(%p)\n",
-	       tsk->comm, task_pid_nr(tsk), sem);
-#endif
-}
-
-int __sched
-__down_failed_interruptible(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	long ret = 0;
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down failed(%p)\n",
-	       tsk->comm, task_pid_nr(tsk), sem);
-#endif
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	wmb();
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			/*
-			 * A signal is pending - give up trying.
-			 * Set sem->count to 0 if it is negative,
-			 * since we are no longer sleeping.
-			 */
-			__sem_update_count(sem, 0);
-			ret = -EINTR;
-			break;
-		}
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-	wake_up(&sem->wait);
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down %s(%p)\n",
-	       current->comm, task_pid_nr(current),
-	       (ret < 0 ? "interrupted" : "acquired"), sem);
-#endif
-	return ret;
-}
-
-void
-__up_wakeup(struct semaphore *sem)
-{
-	/*
-	 * Note that we incremented count in up() before we came here,
-	 * but that was ineffective since the result was <= 0, and
-	 * any negative value of count is equivalent to 0.
-	 * This ends up setting count to 1, unless count is now > 0
-	 * (i.e. because some other cpu has called up() in the meantime),
-	 * in which case we just increment count.
-	 */
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-
-void __sched
-down(struct semaphore *sem)
-{
-#ifdef WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down(%p) <count=%d> from %p\n",
-	       current->comm, task_pid_nr(current), sem,
-	       atomic_read(&sem->count), __builtin_return_address(0));
-#endif
-	__down(sem);
-}
-
-int __sched
-down_interruptible(struct semaphore *sem)
-{
-#ifdef WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down(%p) <count=%d> from %p\n",
-	       current->comm, task_pid_nr(current), sem,
-	       atomic_read(&sem->count), __builtin_return_address(0));
-#endif
-	return __down_interruptible(sem);
-}
-
-int
-down_trylock(struct semaphore *sem)
-{
-	int ret;
-
-#ifdef WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-
-	ret = __down_trylock(sem);
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down_trylock %s from %p\n",
-	       current->comm, task_pid_nr(current),
-	       ret ? "failed" : "acquired",
-	       __builtin_return_address(0));
-#endif
-
-	return ret;
-}
-
-void
-up(struct semaphore *sem)
-{
-#ifdef WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): up(%p) <count=%d> from %p\n",
-	       current->comm, task_pid_nr(current), sem,
-	       atomic_read(&sem->count), __builtin_return_address(0));
-#endif
-	__up(sem);
-}
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 00d44c6..6235f72 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -7,7 +7,7 @@
 # Object file lists.
 
 obj-y		:= compat.o entry-armv.o entry-common.o irq.o \
-		   process.o ptrace.o semaphore.o setup.o signal.o \
+		   process.o ptrace.o setup.o signal.o \
 		   sys_arm.o stacktrace.o time.o traps.o
 
 obj-$(CONFIG_ISA_DMA_API)	+= dma.o
diff --git a/arch/arm/kernel/semaphore.c b/arch/arm/kernel/semaphore.c
deleted file mode 100644
index 981fe5c..0000000
--- a/arch/arm/kernel/semaphore.c
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- *  ARM semaphore implementation, taken from
- *
- *  i386 semaphore implementation.
- *
- *  (C) Copyright 1999 Linus Torvalds
- *
- *  Modified for ARM by Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-
-#include <asm/semaphore.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is
- * protected by the semaphore spinlock.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-static DEFINE_SPINLOCK(semaphore_lock);
-
-void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_UNINTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-	wake_up(&sem->wait);
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_INTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers ++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock. The
-		 * "-1" is because we're still hoping to get
-		 * the lock.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_INTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	tsk->state = TASK_RUNNING;
-	remove_wait_queue(&sem->wait, &wait);
-	wake_up(&sem->wait);
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- *
- * We could have done the trylock with a
- * single "cmpxchg" without failure cases,
- * but then it wouldn't work on a 386.
- */
-int __down_trylock(struct semaphore * sem)
-{
-	int sleepers;
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count))
-		wake_up(&sem->wait);
-
-	spin_unlock_irqrestore(&semaphore_lock, flags);
-	return 1;
-}
-
-/*
- * The semaphore operations have a special calling sequence that
- * allow us to do a simpler in-line version of them. These routines
- * need to convert that sequence back into the C sequence when
- * there is contention on the semaphore.
- *
- * ip contains the semaphore pointer on entry. Save the C-clobbered
- * registers (r0 to r3 and lr), but not ip, as we use it as a return
- * value in some cases..
- * To remain AAPCS compliant (64-bit stack align) we save r4 as well.
- */
-asm("	.section .sched.text,\"ax\",%progbits	\n\
-	.align	5				\n\
-	.globl	__down_failed			\n\
-__down_failed:					\n\
-	stmfd	sp!, {r0 - r4, lr}		\n\
-	mov	r0, ip				\n\
-	bl	__down				\n\
-	ldmfd	sp!, {r0 - r4, pc}		\n\
-						\n\
-	.align	5				\n\
-	.globl	__down_interruptible_failed	\n\
-__down_interruptible_failed:			\n\
-	stmfd	sp!, {r0 - r4, lr}		\n\
-	mov	r0, ip				\n\
-	bl	__down_interruptible		\n\
-	mov	ip, r0				\n\
-	ldmfd	sp!, {r0 - r4, pc}		\n\
-						\n\
-	.align	5				\n\
-	.globl	__down_trylock_failed		\n\
-__down_trylock_failed:				\n\
-	stmfd	sp!, {r0 - r4, lr}		\n\
-	mov	r0, ip				\n\
-	bl	__down_trylock			\n\
-	mov	ip, r0				\n\
-	ldmfd	sp!, {r0 - r4, pc}		\n\
-						\n\
-	.align	5				\n\
-	.globl	__up_wakeup			\n\
-__up_wakeup:					\n\
-	stmfd	sp!, {r0 - r4, lr}		\n\
-	mov	r0, ip				\n\
-	bl	__up				\n\
-	ldmfd	sp!, {r0 - r4, pc}		\n\
-	");
-
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_interruptible_failed);
-EXPORT_SYMBOL(__down_trylock_failed);
-EXPORT_SYMBOL(__up_wakeup);
diff --git a/arch/avr32/kernel/Makefile b/arch/avr32/kernel/Makefile
index e4b6d12..18229d0 100644
--- a/arch/avr32/kernel/Makefile
+++ b/arch/avr32/kernel/Makefile
@@ -6,7 +6,7 @@
 
 obj-$(CONFIG_SUBARCH_AVR32B)	+= entry-avr32b.o
 obj-y				+= syscall_table.o syscall-stubs.o irq.o
-obj-y				+= setup.o traps.o semaphore.o ocd.o ptrace.o
+obj-y				+= setup.o traps.o ocd.o ptrace.o
 obj-y				+= signal.o sys_avr32.o process.o time.o
 obj-y				+= init_task.o switch_to.o cpu.o
 obj-$(CONFIG_MODULES)		+= module.o avr32_ksyms.o
diff --git a/arch/avr32/kernel/semaphore.c b/arch/avr32/kernel/semaphore.c
deleted file mode 100644
index 1e2705a..0000000
--- a/arch/avr32/kernel/semaphore.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * AVR32 sempahore implementation.
- *
- * Copyright (C) 2004-2006 Atmel Corporation
- *
- * Based on linux/arch/i386/kernel/semaphore.c
- *  Copyright (C) 1999 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-
-#include <asm/semaphore.h>
-#include <asm/atomic.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-EXPORT_SYMBOL(__up);
-
-void __sched __down(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-        DECLARE_WAITQUEUE(wait, tsk);
-        unsigned long flags;
-
-        tsk->state = TASK_UNINTERRUPTIBLE;
-        spin_lock_irqsave(&sem->wait.lock, flags);
-        add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-        sem->sleepers++;
-        for (;;) {
-                int sleepers = sem->sleepers;
-
-                /*
-                 * Add "everybody else" into it. They aren't
-                 * playing, because we own the spinlock in
-                 * the wait_queue_head.
-                 */
-                if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
-                        sem->sleepers = 0;
-                        break;
-                }
-                sem->sleepers = 1;      /* us - see -1 above */
-                spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-                schedule();
-
-                spin_lock_irqsave(&sem->wait.lock, flags);
-                tsk->state = TASK_UNINTERRUPTIBLE;
-        }
-        remove_wait_queue_locked(&sem->wait, &wait);
-        wake_up_locked(&sem->wait);
-        spin_unlock_irqrestore(&sem->wait.lock, flags);
-        tsk->state = TASK_RUNNING;
-}
-EXPORT_SYMBOL(__down);
-
-int __sched __down_interruptible(struct semaphore *sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-        DECLARE_WAITQUEUE(wait, tsk);
-        unsigned long flags;
-
-        tsk->state = TASK_INTERRUPTIBLE;
-        spin_lock_irqsave(&sem->wait.lock, flags);
-        add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-        sem->sleepers++;
-        for (;;) {
-                int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into the trylock
-		 * failure case - we won't be sleeping, and we can't
-		 * get the lock as it has contention. Just correct the
-		 * count and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-                /*
-                 * Add "everybody else" into it. They aren't
-                 * playing, because we own the spinlock in
-                 * the wait_queue_head.
-                 */
-                if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
-                        sem->sleepers = 0;
-                        break;
-                }
-                sem->sleepers = 1;      /* us - see -1 above */
-                spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-                schedule();
-
-                spin_lock_irqsave(&sem->wait.lock, flags);
-                tsk->state = TASK_INTERRUPTIBLE;
-        }
-        remove_wait_queue_locked(&sem->wait, &wait);
-        wake_up_locked(&sem->wait);
-        spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-        tsk->state = TASK_RUNNING;
-	return retval;
-}
-EXPORT_SYMBOL(__down_interruptible);
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 589c6ac..2dd1f30 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -31,10 +31,6 @@
 	bool
 	default y
 
-config SEMAPHORE_SLEEPERS
-	bool
-	default y
-
 config GENERIC_FIND_NEXT_BIT
 	bool
 	default y
diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c
index 0bfbb26..053edff 100644
--- a/arch/blackfin/kernel/bfin_ksyms.c
+++ b/arch/blackfin/kernel/bfin_ksyms.c
@@ -42,11 +42,6 @@
 
 EXPORT_SYMBOL(kernel_thread);
 
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__down_interruptible);
-
 EXPORT_SYMBOL(is_in_rom);
 EXPORT_SYMBOL(bfin_return_from_exception);
 
diff --git a/arch/cris/kernel/Makefile b/arch/cris/kernel/Makefile
index c8e8ea5..ee7bcd4 100644
--- a/arch/cris/kernel/Makefile
+++ b/arch/cris/kernel/Makefile
@@ -5,8 +5,7 @@
 
 extra-y	:= vmlinux.lds
 
-obj-y   := process.o traps.o irq.o ptrace.o setup.o \
-	   time.o sys_cris.o semaphore.o
+obj-y   := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o
 
 obj-$(CONFIG_MODULES)    += crisksyms.o
 obj-$(CONFIG_MODULES)	 += module.o
diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c
index 62f0e75..7ac000f 100644
--- a/arch/cris/kernel/crisksyms.c
+++ b/arch/cris/kernel/crisksyms.c
@@ -9,7 +9,6 @@
 #include <linux/string.h>
 #include <linux/tty.h>
 
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
@@ -49,12 +48,6 @@
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
 
-/* Semaphore functions */
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-
 /* Userspace access functions */
 EXPORT_SYMBOL(__copy_user_zeroing);
 EXPORT_SYMBOL(__copy_user);
diff --git a/arch/cris/kernel/semaphore.c b/arch/cris/kernel/semaphore.c
deleted file mode 100644
index f137a439..0000000
--- a/arch/cris/kernel/semaphore.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- *  Generic semaphore code. Buyer beware. Do your own
- * specific changes in <asm/semaphore-helper.h>
- */
-
-#include <linux/sched.h>
-#include <asm/semaphore-helper.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-#define DOWN_VAR				\
-	struct task_struct *tsk = current;	\
-	wait_queue_t wait;			\
-	init_waitqueue_entry(&wait, tsk);
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	tsk->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		tsk->state = (task_state);	\
-	}					\
-	tsk->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DOWN_VAR
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-	DOWN_VAR
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, tsk);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}
diff --git a/arch/frv/kernel/Makefile b/arch/frv/kernel/Makefile
index e8f73ed..c36f70b 100644
--- a/arch/frv/kernel/Makefile
+++ b/arch/frv/kernel/Makefile
@@ -9,7 +9,7 @@
 
 obj-y := $(heads-y) entry.o entry-table.o break.o switch_to.o kernel_thread.o \
 	 kernel_execve.o process.o traps.o ptrace.o signal.o dma.o \
-	 sys_frv.o time.o semaphore.o setup.o frv_ksyms.o \
+	 sys_frv.o time.o setup.o frv_ksyms.o \
 	 debug-stub.o irq.o sleep.o uaccess.o
 
 obj-$(CONFIG_GDBSTUB)		+= gdb-stub.o gdb-io.o
diff --git a/arch/frv/kernel/frv_ksyms.c b/arch/frv/kernel/frv_ksyms.c
index f772704..0316b3c 100644
--- a/arch/frv/kernel/frv_ksyms.c
+++ b/arch/frv/kernel/frv_ksyms.c
@@ -12,7 +12,6 @@
 #include <asm/pgalloc.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/hardirq.h>
 #include <asm/cacheflush.h>
diff --git a/arch/frv/kernel/semaphore.c b/arch/frv/kernel/semaphore.c
deleted file mode 100644
index 7ee3a14..0000000
--- a/arch/frv/kernel/semaphore.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/* semaphore.c: FR-V semaphores
- *
- * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- * - Derived from lib/rwsem-spinlock.c
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <asm/semaphore.h>
-
-struct sem_waiter {
-	struct list_head	list;
-	struct task_struct	*task;
-};
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-void semtrace(struct semaphore *sem, const char *str)
-{
-	if (sem->debug)
-		printk("[%d] %s({%d,%d})\n",
-		       current->pid,
-		       str,
-		       sem->counter,
-		       list_empty(&sem->wait_list) ? 0 : 1);
-}
-#else
-#define semtrace(SEM,STR) do { } while(0)
-#endif
-
-/*
- * wait for a token to be granted from a semaphore
- * - entered with lock held and interrupts disabled
- */
-void __down(struct semaphore *sem, unsigned long flags)
-{
-	struct task_struct *tsk = current;
-	struct sem_waiter waiter;
-
-	semtrace(sem, "Entering __down");
-
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
-	get_task_struct(tsk);
-
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we don't need to touch the semaphore struct anymore */
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	/* wait to be given the semaphore */
-	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-
-	for (;;) {
-		if (list_empty(&waiter.list))
-			break;
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-
-	tsk->state = TASK_RUNNING;
-	semtrace(sem, "Leaving __down");
-}
-
-EXPORT_SYMBOL(__down);
-
-/*
- * interruptibly wait for a token to be granted from a semaphore
- * - entered with lock held and interrupts disabled
- */
-int __down_interruptible(struct semaphore *sem, unsigned long flags)
-{
-	struct task_struct *tsk = current;
-	struct sem_waiter waiter;
-	int ret;
-
-	semtrace(sem,"Entering __down_interruptible");
-
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
-	get_task_struct(tsk);
-
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we don't need to touch the semaphore struct anymore */
-	set_task_state(tsk, TASK_INTERRUPTIBLE);
-
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	/* wait to be given the semaphore */
-	ret = 0;
-	for (;;) {
-		if (list_empty(&waiter.list))
-			break;
-		if (unlikely(signal_pending(current)))
-			goto interrupted;
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-
- out:
-	tsk->state = TASK_RUNNING;
-	semtrace(sem, "Leaving __down_interruptible");
-	return ret;
-
- interrupted:
-	spin_lock_irqsave(&sem->wait_lock, flags);
-
-	if (!list_empty(&waiter.list)) {
-		list_del(&waiter.list);
-		ret = -EINTR;
-	}
-
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-	if (ret == -EINTR)
-		put_task_struct(current);
-	goto out;
-}
-
-EXPORT_SYMBOL(__down_interruptible);
-
-/*
- * release a single token back to a semaphore
- * - entered with lock held and interrupts disabled
- */
-void __up(struct semaphore *sem)
-{
-	struct task_struct *tsk;
-	struct sem_waiter *waiter;
-
-	semtrace(sem,"Entering __up");
-
-	/* grant the token to the process at the front of the queue */
-	waiter = list_entry(sem->wait_list.next, struct sem_waiter, list);
-
-	/* We must be careful not to touch 'waiter' after we set ->task = NULL.
-	 * It is allocated on the waiter's stack and may become invalid at
-	 * any time after that point (due to a wakeup from another source).
-	 */
-	list_del_init(&waiter->list);
-	tsk = waiter->task;
-	mb();
-	waiter->task = NULL;
-	wake_up_process(tsk);
-	put_task_struct(tsk);
-
-	semtrace(sem,"Leaving __up");
-}
-
-EXPORT_SYMBOL(__up);
diff --git a/arch/h8300/kernel/Makefile b/arch/h8300/kernel/Makefile
index 874f6ae..6c248c3 100644
--- a/arch/h8300/kernel/Makefile
+++ b/arch/h8300/kernel/Makefile
@@ -5,7 +5,7 @@
 extra-y := vmlinux.lds
 
 obj-y := process.o traps.o ptrace.o irq.o \
-	 sys_h8300.o time.o semaphore.o signal.o \
+	 sys_h8300.o time.o signal.o \
          setup.o gpio.o init_task.o syscalls.o \
 	 entry.o
 
diff --git a/arch/h8300/kernel/h8300_ksyms.c b/arch/h8300/kernel/h8300_ksyms.c
index d1b1526..6866bd9 100644
--- a/arch/h8300/kernel/h8300_ksyms.c
+++ b/arch/h8300/kernel/h8300_ksyms.c
@@ -12,7 +12,6 @@
 #include <asm/pgalloc.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/current.h>
 #include <asm/gpio.h>
diff --git a/arch/h8300/kernel/semaphore.c b/arch/h8300/kernel/semaphore.c
deleted file mode 100644
index d12cbbf..0000000
--- a/arch/h8300/kernel/semaphore.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- *  Generic semaphore code. Buyer beware. Do your own
- * specific changes in <asm/semaphore-helper.h>
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/semaphore-helper.h>
-
-#ifndef CONFIG_RMW_INSNS
-spinlock_t semaphore_wake_lock;
-#endif
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	current->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		current->state = (task_state);	\
-	}					\
-	current->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int ret = 0;
-
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, current);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 33e5a59..13fd10e 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -6,7 +6,7 @@
 
 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
 	 irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o		\
-	 salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+	 salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
 	 unwind.o mca.o mca_asm.o topology.o
 
 obj-$(CONFIG_IA64_BRL_EMU)	+= brl_emu.o
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 8e7193d..6da1f20 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -19,12 +19,6 @@
 EXPORT_SYMBOL(ip_fast_csum);		/* hand-coded assembly */
 EXPORT_SYMBOL(csum_ipv6_magic);
 
-#include <asm/semaphore.h>
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__up);
-
 #include <asm/page.h>
 EXPORT_SYMBOL(clear_page);
 
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c
deleted file mode 100644
index 2724ef3..0000000
--- a/arch/ia64/kernel/semaphore.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * IA-64 semaphore implementation (derived from x86 version).
- *
- * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-/*
- * Semaphores are implemented using a two-way counter: The "count"
- * variable is decremented for each process that tries to acquire the
- * semaphore, while the "sleepers" variable is a count of such
- * acquires.
- *
- * Notably, the inline "up()" and "down()" functions can efficiently
- * test if they need to do any extra work (up needs to do something
- * only if count was negative before the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is contention
- * on the lock, and as such all this is the "non-critical" part of the
- * whole semaphore business. The critical part is the inline stuff in
- * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
- */
-#include <linux/sched.h>
-#include <linux/init.h>
-
-#include <asm/errno.h>
-#include <asm/semaphore.h>
-
-/*
- * Logic:
- *  - Only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - When we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleepers" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void
-__up (struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-void __sched __down (struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * the wait_queue_head.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_UNINTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	tsk->state = TASK_RUNNING;
-}
-
-int __sched __down_interruptible (struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers ++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * wait_queue_head. The "-1" is because we're
-		 * still hoping to get the semaphore.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_INTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-	tsk->state = TASK_RUNNING;
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for having decremented the
- * count.
- */
-int
-__down_trylock (struct semaphore *sem)
-{
-	unsigned long flags;
-	int sleepers;
-
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock in the
-	 * wait_queue_head.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count)) {
-		wake_up_locked(&sem->wait);
-	}
-
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	return 1;
-}
diff --git a/arch/m32r/kernel/Makefile b/arch/m32r/kernel/Makefile
index e97e26e..09200d4 100644
--- a/arch/m32r/kernel/Makefile
+++ b/arch/m32r/kernel/Makefile
@@ -5,7 +5,7 @@
 extra-y	:= head.o init_task.o vmlinux.lds
 
 obj-y	:= process.o entry.o traps.o align.o irq.o setup.o time.o \
-	m32r_ksyms.o sys_m32r.o semaphore.o signal.o ptrace.o
+	m32r_ksyms.o sys_m32r.o signal.o ptrace.o
 
 obj-$(CONFIG_SMP)		+= smp.o smpboot.o
 obj-$(CONFIG_MODULES)		+= module.o
diff --git a/arch/m32r/kernel/m32r_ksyms.c b/arch/m32r/kernel/m32r_ksyms.c
index 41a4c95..e6709fe 100644
--- a/arch/m32r/kernel/m32r_ksyms.c
+++ b/arch/m32r/kernel/m32r_ksyms.c
@@ -7,7 +7,6 @@
 #include <linux/interrupt.h>
 #include <linux/string.h>
 
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
@@ -22,10 +21,6 @@
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
 EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down_trylock);
 
 /* Networking helper routines. */
 /* Delay loops */
diff --git a/arch/m32r/kernel/semaphore.c b/arch/m32r/kernel/semaphore.c
deleted file mode 100644
index 940c2d3..0000000
--- a/arch/m32r/kernel/semaphore.c
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- *  linux/arch/m32r/semaphore.c
- *    orig : i386 2.6.4
- *
- *  M32R semaphore implementation.
- *
- *	Copyright (c) 2002 - 2004 Hitoshi Yamamoto
- */
-
-/*
- * i386 semaphore implementation.
- *
- * (C) Copyright 1999 Linus Torvalds
- *
- * Portions Copyright 1999 Red Hat, Inc.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
- */
-#include <linux/sched.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <asm/semaphore.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-asmlinkage void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-asmlinkage void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * the wait_queue_head.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_UNINTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	tsk->state = TASK_RUNNING;
-}
-
-asmlinkage int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * wait_queue_head. The "-1" is because we're
-		 * still hoping to get the semaphore.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_INTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-	tsk->state = TASK_RUNNING;
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- *
- * We could have done the trylock with a
- * single "cmpxchg" without failure cases,
- * but then it wouldn't work on a 386.
- */
-asmlinkage int __down_trylock(struct semaphore * sem)
-{
-	int sleepers;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock in the
-	 * wait_queue_head.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count)) {
-		wake_up_locked(&sem->wait);
-	}
-
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	return 1;
-}
diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile
index a806208..7a62a71 100644
--- a/arch/m68k/kernel/Makefile
+++ b/arch/m68k/kernel/Makefile
@@ -10,7 +10,7 @@
 extra-y	+= vmlinux.lds
 
 obj-y	:= entry.o process.o traps.o ints.o signal.o ptrace.o module.o \
-	   sys_m68k.o time.o semaphore.o setup.o m68k_ksyms.o devres.o
+	   sys_m68k.o time.o setup.o m68k_ksyms.o devres.o
 
 devres-y = ../../../kernel/irq/devres.o
 
diff --git a/arch/m68k/kernel/m68k_ksyms.c b/arch/m68k/kernel/m68k_ksyms.c
index 6fc69c7..d900e77 100644
--- a/arch/m68k/kernel/m68k_ksyms.c
+++ b/arch/m68k/kernel/m68k_ksyms.c
@@ -1,5 +1,4 @@
 #include <linux/module.h>
-#include <asm/semaphore.h>
 
 asmlinkage long long __ashldi3 (long long, int);
 asmlinkage long long __ashrdi3 (long long, int);
@@ -15,8 +14,3 @@
 EXPORT_SYMBOL(__lshrdi3);
 EXPORT_SYMBOL(__muldi3);
 
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
-
diff --git a/arch/m68k/kernel/semaphore.c b/arch/m68k/kernel/semaphore.c
deleted file mode 100644
index d12cbbf..0000000
--- a/arch/m68k/kernel/semaphore.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- *  Generic semaphore code. Buyer beware. Do your own
- * specific changes in <asm/semaphore-helper.h>
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/semaphore-helper.h>
-
-#ifndef CONFIG_RMW_INSNS
-spinlock_t semaphore_wake_lock;
-#endif
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	current->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		current->state = (task_state);	\
-	}					\
-	current->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int ret = 0;
-
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, current);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}
diff --git a/arch/m68k/lib/Makefile b/arch/m68k/lib/Makefile
index 6bbf19f..a18af09 100644
--- a/arch/m68k/lib/Makefile
+++ b/arch/m68k/lib/Makefile
@@ -5,4 +5,4 @@
 EXTRA_AFLAGS := -traditional
 
 lib-y	:= ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
-	   checksum.o string.o semaphore.o uaccess.o
+	   checksum.o string.o uaccess.o
diff --git a/arch/m68k/lib/semaphore.S b/arch/m68k/lib/semaphore.S
deleted file mode 100644
index 0215624c..0000000
--- a/arch/m68k/lib/semaphore.S
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- *  linux/arch/m68k/lib/semaphore.S
- *
- *  Copyright (C) 1996  Linus Torvalds
- *
- *  m68k version by Andreas Schwab
- */
-
-#include <linux/linkage.h>
-#include <asm/semaphore.h>
-
-/*
- * The semaphore operations have a special calling sequence that
- * allow us to do a simpler in-line version of them. These routines
- * need to convert that sequence back into the C sequence when
- * there is contention on the semaphore.
- */
-ENTRY(__down_failed)
-	moveml %a0/%d0/%d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __down
-	movel (%sp)+,%a1
-	moveml (%sp)+,%a0/%d0/%d1
-	rts
-
-ENTRY(__down_failed_interruptible)
-	movel %a0,-(%sp)
-	movel %d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __down_interruptible
-	movel (%sp)+,%a1
-	movel (%sp)+,%d1
-	movel (%sp)+,%a0
-	rts
-
-ENTRY(__down_failed_trylock)
-	movel %a0,-(%sp)
-	movel %d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __down_trylock
-	movel (%sp)+,%a1
-	movel (%sp)+,%d1
-	movel (%sp)+,%a0
-	rts
-
-ENTRY(__up_wakeup)
-	moveml %a0/%d0/%d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __up
-	movel (%sp)+,%a1
-	moveml (%sp)+,%a0/%d0/%d1
-	rts
-
diff --git a/arch/m68knommu/kernel/Makefile b/arch/m68knommu/kernel/Makefile
index 1524b39..f0eab3d 100644
--- a/arch/m68knommu/kernel/Makefile
+++ b/arch/m68knommu/kernel/Makefile
@@ -5,7 +5,7 @@
 extra-y := vmlinux.lds
 
 obj-y += dma.o entry.o init_task.o irq.o m68k_ksyms.o process.o ptrace.o \
-	 semaphore.o setup.o signal.o syscalltable.o sys_m68k.o time.o traps.o
+	 setup.o signal.o syscalltable.o sys_m68k.o time.o traps.o
 
 obj-$(CONFIG_MODULES)	+= module.o
 obj-$(CONFIG_COMEMPCI)	+= comempci.o
diff --git a/arch/m68knommu/kernel/m68k_ksyms.c b/arch/m68knommu/kernel/m68k_ksyms.c
index 53fad14..39fe0a7 100644
--- a/arch/m68knommu/kernel/m68k_ksyms.c
+++ b/arch/m68knommu/kernel/m68k_ksyms.c
@@ -13,7 +13,6 @@
 #include <asm/pgalloc.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/current.h>
 
@@ -39,11 +38,6 @@
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
 
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
-
 /*
  * libgcc functions - functions that are used internally by the
  * compiler...  (prototypes are not correct though, but that
diff --git a/arch/m68knommu/kernel/semaphore.c b/arch/m68knommu/kernel/semaphore.c
deleted file mode 100644
index bce2bc7..0000000
--- a/arch/m68knommu/kernel/semaphore.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- *  Generic semaphore code. Buyer beware. Do your own
- * specific changes in <asm/semaphore-helper.h>
- */
-
-#include <linux/sched.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <asm/semaphore-helper.h>
-
-#ifndef CONFIG_RMW_INSNS
-spinlock_t semaphore_wake_lock;
-#endif
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	current->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		current->state = (task_state);	\
-	}					\
-	current->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int ret = 0;
-
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, current);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}
diff --git a/arch/m68knommu/lib/Makefile b/arch/m68knommu/lib/Makefile
index e051a79..d94d709 100644
--- a/arch/m68knommu/lib/Makefile
+++ b/arch/m68knommu/lib/Makefile
@@ -4,4 +4,4 @@
 
 lib-y	:= ashldi3.o ashrdi3.o lshrdi3.o \
 	   muldi3.o mulsi3.o divsi3.o udivsi3.o modsi3.o umodsi3.o \
-	   checksum.o semaphore.o memcpy.o memset.o delay.o
+	   checksum.o memcpy.o memset.o delay.o
diff --git a/arch/m68knommu/lib/semaphore.S b/arch/m68knommu/lib/semaphore.S
deleted file mode 100644
index 87c7460..0000000
--- a/arch/m68knommu/lib/semaphore.S
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- *  linux/arch/m68k/lib/semaphore.S
- *
- *  Copyright (C) 1996  Linus Torvalds
- *
- *  m68k version by Andreas Schwab
- *
- *  MAR/1999 -- modified to support ColdFire (gerg@snapgear.com)
- */
-
-#include <linux/linkage.h>
-#include <asm/semaphore.h>
-
-/*
- * "down_failed" is called with the eventual return address
- * in %a0, and the address of the semaphore in %a1. We need
- * to increment the number of waiters on the semaphore,
- * call "__down()", and then eventually return to try again.
- */
-ENTRY(__down_failed)
-#ifdef CONFIG_COLDFIRE
-	subl #12,%sp
-	moveml %a0/%d0/%d1,(%sp)
-#else
-	moveml %a0/%d0/%d1,-(%sp)
-#endif
-	movel %a1,-(%sp)
-	jbsr __down
-	movel (%sp)+,%a1
-	movel (%sp)+,%d0
-	movel (%sp)+,%d1
-	rts
-
-ENTRY(__down_failed_interruptible)
-	movel %a0,-(%sp)
-	movel %d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __down_interruptible
-	movel (%sp)+,%a1
-	movel (%sp)+,%d1
-	rts
-
-ENTRY(__up_wakeup)
-#ifdef CONFIG_COLDFIRE
-	subl #12,%sp
-	moveml %a0/%d0/%d1,(%sp)
-#else
-	moveml %a0/%d0/%d1,-(%sp)
-#endif
-	movel %a1,-(%sp)
-	jbsr __up
-	movel (%sp)+,%a1
-	movel (%sp)+,%d0
-	movel (%sp)+,%d1
-	rts
-
-ENTRY(__down_failed_trylock)
-	movel %a0,-(%sp)
-	movel %d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __down_trylock
-	movel (%sp)+,%a1
-	movel (%sp)+,%d1
-	movel (%sp)+,%a0
-	rts
-
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 9e78e1a..6fcdb6f 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -5,7 +5,7 @@
 extra-y		:= head.o init_task.o vmlinux.lds
 
 obj-y		+= cpu-probe.o branch.o entry.o genex.o irq.o process.o \
-		   ptrace.o reset.o semaphore.o setup.o signal.o syscall.o \
+		   ptrace.o reset.o setup.o signal.o syscall.o \
 		   time.o topology.o traps.o unaligned.o
 
 obj-$(CONFIG_CEVT_BCM1480)	+= cevt-bcm1480.o
diff --git a/arch/mips/kernel/semaphore.c b/arch/mips/kernel/semaphore.c
deleted file mode 100644
index 1265358..0000000
--- a/arch/mips/kernel/semaphore.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * MIPS-specific semaphore code.
- *
- * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
- * Copyright (C) 2004 Ralf Baechle <ralf@linux-mips.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * April 2001 - Reworked by Paul Mackerras <paulus@samba.org>
- * to eliminate the SMP races in the old version between the updates
- * of `count' and `waking'.  Now we use negative `count' values to
- * indicate that some process(es) are waiting for the semaphore.
- */
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/atomic.h>
-#include <asm/cpu-features.h>
-#include <asm/errno.h>
-#include <asm/semaphore.h>
-#include <asm/war.h>
-/*
- * Atomically update sem->count.
- * This does the equivalent of the following:
- *
- *	old_count = sem->count;
- *	tmp = MAX(old_count, 0) + incr;
- *	sem->count = tmp;
- *	return old_count;
- *
- * On machines without lld/scd we need a spinlock to make the manipulation of
- * sem->count and sem->waking atomic.  Scalability isn't an issue because
- * this lock is used on UP only so it's just an empty variable.
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	int old_count, tmp;
-
-	if (cpu_has_llsc && R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	mips3					\n"
-		"1:	ll	%0, %2		# __sem_update_count	\n"
-		"	sra	%1, %0, 31				\n"
-		"	not	%1					\n"
-		"	and	%1, %0, %1				\n"
-		"	addu	%1, %1, %3				\n"
-		"	sc	%1, %2					\n"
-		"	beqzl	%1, 1b					\n"
-		"	.set	mips0					\n"
-		: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-		: "r" (incr), "m" (sem->count));
-	} else if (cpu_has_llsc) {
-		__asm__ __volatile__(
-		"	.set	mips3					\n"
-		"1:	ll	%0, %2		# __sem_update_count	\n"
-		"	sra	%1, %0, 31				\n"
-		"	not	%1					\n"
-		"	and	%1, %0, %1				\n"
-		"	addu	%1, %1, %3				\n"
-		"	sc	%1, %2					\n"
-		"	beqz	%1, 1b					\n"
-		"	.set	mips0					\n"
-		: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-		: "r" (incr), "m" (sem->count));
-	} else {
-		static DEFINE_SPINLOCK(semaphore_lock);
-		unsigned long flags;
-
-		spin_lock_irqsave(&semaphore_lock, flags);
-		old_count = atomic_read(&sem->count);
-		tmp = max_t(int, old_count, 0) + incr;
-		atomic_set(&sem->count, tmp);
-		spin_unlock_irqrestore(&semaphore_lock, flags);
-	}
-
-	return old_count;
-}
-
-void __up(struct semaphore *sem)
-{
-	/*
-	 * Note that we incremented count in up() before we came here,
-	 * but that was ineffective since the result was <= 0, and
-	 * any negative value of count is equivalent to 0.
-	 * This ends up setting count to 1, unless count is now > 0
-	 * (i.e. because some other cpu has called up() in the meantime),
-	 * in which case we just increment count.
-	 */
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-
-EXPORT_SYMBOL(__up);
-
-/*
- * Note that when we come in to __down or __down_interruptible,
- * we have already decremented count, but that decrement was
- * ineffective since the result was < 0, and any negative value
- * of count is equivalent to 0.
- * Thus it is only when we decrement count from some value > 0
- * that we have actually got the semaphore.
- */
-void __sched __down(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	/*
-	 * Try to get the semaphore.  If the count is > 0, then we've
-	 * got the semaphore; we decrement count and exit the loop.
-	 * If the count is 0 or negative, we set it to -1, indicating
-	 * that we are asleep, and then sleep.
-	 */
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-
-	/*
-	 * If there are any more sleepers, wake one of them up so
-	 * that it can either get the semaphore, or set count to -1
-	 * indicating that there are still processes sleeping.
-	 */
-	wake_up(&sem->wait);
-}
-
-EXPORT_SYMBOL(__down);
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_INTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			/*
-			 * A signal is pending - give up trying.
-			 * Set sem->count to 0 if it is negative,
-			 * since we are no longer sleeping.
-			 */
-			__sem_update_count(sem, 0);
-			retval = -EINTR;
-			break;
-		}
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-
-	wake_up(&sem->wait);
-	return retval;
-}
-
-EXPORT_SYMBOL(__down_interruptible);
diff --git a/arch/mn10300/kernel/Makefile b/arch/mn10300/kernel/Makefile
index ef07c95..23f2ab6 100644
--- a/arch/mn10300/kernel/Makefile
+++ b/arch/mn10300/kernel/Makefile
@@ -3,7 +3,7 @@
 #
 extra-y := head.o init_task.o vmlinux.lds
 
-obj-y   := process.o semaphore.o signal.o entry.o fpu.o traps.o irq.o \
+obj-y   := process.o signal.o entry.o fpu.o traps.o irq.o \
 	   ptrace.o setup.o time.o sys_mn10300.o io.o kthread.o \
 	   switch_to.o mn10300_ksyms.o kernel_execve.o
 
diff --git a/arch/mn10300/kernel/semaphore.c b/arch/mn10300/kernel/semaphore.c
deleted file mode 100644
index 9153c40..0000000
--- a/arch/mn10300/kernel/semaphore.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/* MN10300 Semaphore implementation
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <asm/semaphore.h>
-
-struct sem_waiter {
-	struct list_head	list;
-	struct task_struct	*task;
-};
-
-#if SEMAPHORE_DEBUG
-void semtrace(struct semaphore *sem, const char *str)
-{
-	if (sem->debug)
-		printk(KERN_DEBUG "[%d] %s({%d,%d})\n",
-		       current->pid,
-		       str,
-		       atomic_read(&sem->count),
-		       list_empty(&sem->wait_list) ? 0 : 1);
-}
-#else
-#define semtrace(SEM, STR) do { } while (0)
-#endif
-
-/*
- * wait for a token to be granted from a semaphore
- * - entered with lock held and interrupts disabled
- */
-void __down(struct semaphore *sem, unsigned long flags)
-{
-	struct task_struct *tsk = current;
-	struct sem_waiter waiter;
-
-	semtrace(sem, "Entering __down");
-
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
-	get_task_struct(tsk);
-
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we don't need to touch the semaphore struct anymore */
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	/* wait to be given the semaphore */
-	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-
-	for (;;) {
-		if (!waiter.task)
-			break;
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-
-	tsk->state = TASK_RUNNING;
-	semtrace(sem, "Leaving __down");
-}
-EXPORT_SYMBOL(__down);
-
-/*
- * interruptibly wait for a token to be granted from a semaphore
- * - entered with lock held and interrupts disabled
- */
-int __down_interruptible(struct semaphore *sem, unsigned long flags)
-{
-	struct task_struct *tsk = current;
-	struct sem_waiter waiter;
-	int ret;
-
-	semtrace(sem, "Entering __down_interruptible");
-
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
-	get_task_struct(tsk);
-
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we don't need to touch the semaphore struct anymore */
-	set_task_state(tsk, TASK_INTERRUPTIBLE);
-
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	/* wait to be given the semaphore */
-	ret = 0;
-	for (;;) {
-		if (!waiter.task)
-			break;
-		if (unlikely(signal_pending(current)))
-			goto interrupted;
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-
- out:
-	tsk->state = TASK_RUNNING;
-	semtrace(sem, "Leaving __down_interruptible");
-	return ret;
-
- interrupted:
-	spin_lock_irqsave(&sem->wait_lock, flags);
-	list_del(&waiter.list);
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	ret = 0;
-	if (!waiter.task) {
-		put_task_struct(current);
-		ret = -EINTR;
-	}
-	goto out;
-}
-EXPORT_SYMBOL(__down_interruptible);
-
-/*
- * release a single token back to a semaphore
- * - entered with lock held and interrupts disabled
- */
-void __up(struct semaphore *sem)
-{
-	struct task_struct *tsk;
-	struct sem_waiter *waiter;
-
-	semtrace(sem, "Entering __up");
-
-	/* grant the token to the process at the front of the queue */
-	waiter = list_entry(sem->wait_list.next, struct sem_waiter, list);
-
-	/* We must be careful not to touch 'waiter' after we set ->task = NULL.
-	 * It is an allocated on the waiter's stack and may become invalid at
-	 * any time after that point (due to a wakeup from another source).
-	 */
-	list_del_init(&waiter->list);
-	tsk = waiter->task;
-	smp_mb();
-	waiter->task = NULL;
-	wake_up_process(tsk);
-	put_task_struct(tsk);
-
-	semtrace(sem, "Leaving __up");
-}
-EXPORT_SYMBOL(__up);
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index 27827bc..1f6585a 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -9,7 +9,7 @@
 
 obj-y	     	:= cache.o pacache.o setup.o traps.o time.o irq.o \
 		   pa7300lc.o syscall.o entry.o sys_parisc.o firmware.o \
-		   ptrace.o hardware.o inventory.o drivers.o semaphore.o \
+		   ptrace.o hardware.o inventory.o drivers.o \
 		   signal.o hpmc.o real2.o parisc_ksyms.o unaligned.o \
 		   process.o processor.o pdc_cons.o pdc_chassis.o unwind.o \
 		   topology.o
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 7aca704..5b7fc4a 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -69,11 +69,6 @@
 EXPORT_SYMBOL(memcpy_fromio);
 EXPORT_SYMBOL(memset_io);
 
-#include <asm/semaphore.h>
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down);
-
 extern void $$divI(void);
 extern void $$divU(void);
 extern void $$remI(void);
diff --git a/arch/parisc/kernel/semaphore.c b/arch/parisc/kernel/semaphore.c
deleted file mode 100644
index ee806bc..0000000
--- a/arch/parisc/kernel/semaphore.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Semaphore implementation Copyright (c) 2001 Matthew Wilcox, Hewlett-Packard
- */
-
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-
-/*
- * Semaphores are complex as we wish to avoid using two variables.
- * `count' has multiple roles, depending on its value.  If it is positive
- * or zero, there are no waiters.  The functions here will never be
- * called; see <asm/semaphore.h>
- *
- * When count is -1 it indicates there is at least one task waiting
- * for the semaphore.
- *
- * When count is less than that, there are '- count - 1' wakeups
- * pending.  ie if it has value -3, there are 2 wakeups pending.
- *
- * Note that these functions are only called when there is contention
- * on the lock, and as such all this is the "non-critical" part of the
- * whole semaphore business. The critical part is the inline stuff in
- * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	sem->count--;
-	wake_up(&sem->wait);
-}
-
-#define wakers(count) (-1 - count)
-
-#define DOWN_HEAD							\
-	int ret = 0;							\
-	DECLARE_WAITQUEUE(wait, current);				\
-									\
-	/* Note that someone is waiting */				\
-	if (sem->count == 0)						\
-		sem->count = -1;					\
-									\
-	/* protected by the sentry still -- use unlocked version */	\
-	wait.flags = WQ_FLAG_EXCLUSIVE;					\
-	__add_wait_queue_tail(&sem->wait, &wait);			\
- lost_race:								\
-	spin_unlock_irq(&sem->sentry);					\
-
-#define DOWN_TAIL							\
-	spin_lock_irq(&sem->sentry);					\
-	if (wakers(sem->count) == 0 && ret == 0)			\
-		goto lost_race;	/* Someone stole our wakeup */		\
-	__remove_wait_queue(&sem->wait, &wait);				\
-	current->state = TASK_RUNNING;					\
-	if (!waitqueue_active(&sem->wait) && (sem->count < 0))		\
-		sem->count = wakers(sem->count);
-
-#define UPDATE_COUNT							\
-	sem->count += (sem->count < 0) ? 1 : - 1;
-	
-
-void __sched __down(struct semaphore * sem)
-{
-	DOWN_HEAD
-
-	for(;;) {
-		set_task_state(current, TASK_UNINTERRUPTIBLE);
-		/* we can _read_ this without the sentry */
-		if (sem->count != -1)
-			break;
- 		schedule();
- 	}
-
-	DOWN_TAIL
-	UPDATE_COUNT
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	DOWN_HEAD
-
-	for(;;) {
-		set_task_state(current, TASK_INTERRUPTIBLE);
-		/* we can _read_ this without the sentry */
-		if (sem->count != -1)
-			break;
-
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			break;
-		}
-		schedule();
-	}
-
-	DOWN_TAIL
-
-	if (!ret) {
-		UPDATE_COUNT
-	}
-
-	return ret;
-}
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index c1baf9d..b9dbfff 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -12,7 +12,7 @@
 CFLAGS_btext.o		+= -fPIC
 endif
 
-obj-y				:= semaphore.o cputable.o ptrace.o syscalls.o \
+obj-y				:= cputable.o ptrace.o syscalls.o \
 				   irq.o align.o signal_32.o pmc.o vdso.o \
 				   init_task.o process.o systbl.o idle.o \
 				   signal.o
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 9c98424..65d14e6 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -15,7 +15,6 @@
 #include <linux/bitops.h>
 
 #include <asm/page.h>
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/cacheflush.h>
 #include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/semaphore.c b/arch/powerpc/kernel/semaphore.c
deleted file mode 100644
index 2f8c3c9..0000000
--- a/arch/powerpc/kernel/semaphore.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * PowerPC-specific semaphore code.
- *
- * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * April 2001 - Reworked by Paul Mackerras <paulus@samba.org>
- * to eliminate the SMP races in the old version between the updates
- * of `count' and `waking'.  Now we use negative `count' values to
- * indicate that some process(es) are waiting for the semaphore.
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/module.h>
-
-#include <asm/atomic.h>
-#include <asm/semaphore.h>
-#include <asm/errno.h>
-
-/*
- * Atomically update sem->count.
- * This does the equivalent of the following:
- *
- *	old_count = sem->count;
- *	tmp = MAX(old_count, 0) + incr;
- *	sem->count = tmp;
- *	return old_count;
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	int old_count, tmp;
-
-	__asm__ __volatile__("\n"
-"1:	lwarx	%0,0,%3\n"
-"	srawi	%1,%0,31\n"
-"	andc	%1,%0,%1\n"
-"	add	%1,%1,%4\n"
-	PPC405_ERR77(0,%3)
-"	stwcx.	%1,0,%3\n"
-"	bne	1b"
-	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-	: "r" (&sem->count), "r" (incr), "m" (sem->count)
-	: "cc");
-
-	return old_count;
-}
-
-void __up(struct semaphore *sem)
-{
-	/*
-	 * Note that we incremented count in up() before we came here,
-	 * but that was ineffective since the result was <= 0, and
-	 * any negative value of count is equivalent to 0.
-	 * This ends up setting count to 1, unless count is now > 0
-	 * (i.e. because some other cpu has called up() in the meantime),
-	 * in which case we just increment count.
-	 */
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-EXPORT_SYMBOL(__up);
-
-/*
- * Note that when we come in to __down or __down_interruptible,
- * we have already decremented count, but that decrement was
- * ineffective since the result was < 0, and any negative value
- * of count is equivalent to 0.
- * Thus it is only when we decrement count from some value > 0
- * that we have actually got the semaphore.
- */
-void __sched __down(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	/*
-	 * Try to get the semaphore.  If the count is > 0, then we've
-	 * got the semaphore; we decrement count and exit the loop.
-	 * If the count is 0 or negative, we set it to -1, indicating
-	 * that we are asleep, and then sleep.
-	 */
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-
-	/*
-	 * If there are any more sleepers, wake one of them up so
-	 * that it can either get the semaphore, or set count to -1
-	 * indicating that there are still processes sleeping.
-	 */
-	wake_up(&sem->wait);
-}
-EXPORT_SYMBOL(__down);
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_INTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			/*
-			 * A signal is pending - give up trying.
-			 * Set sem->count to 0 if it is negative,
-			 * since we are no longer sleeping.
-			 */
-			__sem_update_count(sem, 0);
-			retval = -EINTR;
-			break;
-		}
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-
-	wake_up(&sem->wait);
-	return retval;
-}
-EXPORT_SYMBOL(__down_interruptible);
diff --git a/arch/ppc/kernel/semaphore.c b/arch/ppc/kernel/semaphore.c
deleted file mode 100644
index 2fe429b..0000000
--- a/arch/ppc/kernel/semaphore.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * PowerPC-specific semaphore code.
- *
- * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * April 2001 - Reworked by Paul Mackerras <paulus@samba.org>
- * to eliminate the SMP races in the old version between the updates
- * of `count' and `waking'.  Now we use negative `count' values to
- * indicate that some process(es) are waiting for the semaphore.
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/atomic.h>
-#include <asm/semaphore.h>
-#include <asm/errno.h>
-
-/*
- * Atomically update sem->count.
- * This does the equivalent of the following:
- *
- *	old_count = sem->count;
- *	tmp = MAX(old_count, 0) + incr;
- *	sem->count = tmp;
- *	return old_count;
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	int old_count, tmp;
-
-	__asm__ __volatile__("\n"
-"1:	lwarx	%0,0,%3\n"
-"	srawi	%1,%0,31\n"
-"	andc	%1,%0,%1\n"
-"	add	%1,%1,%4\n"
-	PPC405_ERR77(0,%3)
-"	stwcx.	%1,0,%3\n"
-"	bne	1b"
-	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-	: "r" (&sem->count), "r" (incr), "m" (sem->count)
-	: "cc");
-
-	return old_count;
-}
-
-void __up(struct semaphore *sem)
-{
-	/*
-	 * Note that we incremented count in up() before we came here,
-	 * but that was ineffective since the result was <= 0, and
-	 * any negative value of count is equivalent to 0.
-	 * This ends up setting count to 1, unless count is now > 0
-	 * (i.e. because some other cpu has called up() in the meantime),
-	 * in which case we just increment count.
-	 */
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-
-/*
- * Note that when we come in to __down or __down_interruptible,
- * we have already decremented count, but that decrement was
- * ineffective since the result was < 0, and any negative value
- * of count is equivalent to 0.
- * Thus it is only when we decrement count from some value > 0
- * that we have actually got the semaphore.
- */
-void __sched __down(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-	smp_wmb();
-
-	/*
-	 * Try to get the semaphore.  If the count is > 0, then we've
-	 * got the semaphore; we decrement count and exit the loop.
-	 * If the count is 0 or negative, we set it to -1, indicating
-	 * that we are asleep, and then sleep.
-	 */
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		tsk->state = TASK_UNINTERRUPTIBLE;
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-
-	/*
-	 * If there are any more sleepers, wake one of them up so
-	 * that it can either get the semaphore, or set count to -1
-	 * indicating that there are still processes sleeping.
-	 */
-	wake_up(&sem->wait);
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-	smp_wmb();
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			/*
-			 * A signal is pending - give up trying.
-			 * Set sem->count to 0 if it is negative,
-			 * since we are no longer sleeping.
-			 */
-			__sem_update_count(sem, 0);
-			retval = -EINTR;
-			break;
-		}
-		schedule();
-		tsk->state = TASK_INTERRUPTIBLE;
-	}
-	tsk->state = TASK_RUNNING;
-	remove_wait_queue(&sem->wait, &wait);
-	wake_up(&sem->wait);
-	return retval;
-}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1831833..f6a68e1 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -3,6 +3,10 @@
 # see Documentation/kbuild/kconfig-language.txt.
 #
 
+config SCHED_MC
+	def_bool y
+	depends on SMP
+
 config MMU
 	def_bool y
 
@@ -39,6 +43,9 @@
 config GENERIC_TIME
 	def_bool y
 
+config GENERIC_CLOCKEVENTS
+	def_bool y
+
 config GENERIC_BUG
 	bool
 	depends on BUG
@@ -69,6 +76,8 @@
 
 comment "Processor type and features"
 
+source "kernel/time/Kconfig"
+
 config 64BIT
 	bool "64 bit kernel"
 	help
@@ -301,10 +310,7 @@
 	tristate "QDIO support"
 	---help---
 	  This driver provides the Queued Direct I/O base support for
-	  IBM mainframes.
-
-	  For details please refer to the documentation provided by IBM at
-	  <http://www10.software.ibm.com/developerworks/opensource/linux390>
+	  IBM System z.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called qdio.
@@ -486,25 +492,6 @@
 
 source kernel/Kconfig.hz
 
-config NO_IDLE_HZ
-	bool "No HZ timer ticks in idle"
-	help
-	  Switches the regular HZ timer off when the system is going idle.
-	  This helps z/VM to detect that the Linux system is idle. VM can
-	  then "swap-out" this guest which reduces memory usage. It also
-	  reduces the overhead of idle systems.
-
-	  The HZ timer can be switched on/off via /proc/sys/kernel/hz_timer.
-	  hz_timer=0 means HZ timer is disabled. hz_timer=1 means HZ
-	  timer is active.
-
-config NO_IDLE_HZ_INIT
-	bool "HZ timer in idle off by default"
-	depends on NO_IDLE_HZ
-	help
-	  The HZ timer is switched off in idle by default. That means the
-	  HZ timer is already disabled at boot time.
-
 config S390_HYPFS_FS
 	bool "s390 hypervisor file system support"
 	select SYS_HYPERVISOR
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index a3f67f8..e33f32b 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -499,7 +499,7 @@
 	}
 };
 
-static int __init aes_init(void)
+static int __init aes_s390_init(void)
 {
 	int ret;
 
@@ -542,15 +542,15 @@
 	goto out;
 }
 
-static void __exit aes_fini(void)
+static void __exit aes_s390_fini(void)
 {
 	crypto_unregister_alg(&cbc_aes_alg);
 	crypto_unregister_alg(&ecb_aes_alg);
 	crypto_unregister_alg(&aes_alg);
 }
 
-module_init(aes_init);
-module_exit(aes_fini);
+module_init(aes_s390_init);
+module_exit(aes_s390_fini);
 
 MODULE_ALIAS("aes");
 
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index ea22707..4aba83b 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -550,7 +550,7 @@
 	}
 };
 
-static int init(void)
+static int des_s390_init(void)
 {
 	int ret = 0;
 
@@ -612,7 +612,7 @@
 	goto out;
 }
 
-static void __exit fini(void)
+static void __exit des_s390_fini(void)
 {
 	crypto_unregister_alg(&cbc_des3_192_alg);
 	crypto_unregister_alg(&ecb_des3_192_alg);
@@ -625,8 +625,8 @@
 	crypto_unregister_alg(&des_alg);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(des_s390_init);
+module_exit(des_s390_fini);
 
 MODULE_ALIAS("des");
 MODULE_ALIAS("des3_ede");
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 5a834f6..9cf9eca 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -137,7 +137,7 @@
 	.dia_final	=	sha1_final } }
 };
 
-static int __init init(void)
+static int __init sha1_s390_init(void)
 {
 	if (!crypt_s390_func_available(KIMD_SHA_1))
 		return -EOPNOTSUPP;
@@ -145,13 +145,13 @@
 	return crypto_register_alg(&alg);
 }
 
-static void __exit fini(void)
+static void __exit sha1_s390_fini(void)
 {
 	crypto_unregister_alg(&alg);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(sha1_s390_init);
+module_exit(sha1_s390_fini);
 
 MODULE_ALIAS("sha1");
 
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index ccf8633..2a3d756 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -133,7 +133,7 @@
 	.dia_final	=	sha256_final } }
 };
 
-static int init(void)
+static int sha256_s390_init(void)
 {
 	if (!crypt_s390_func_available(KIMD_SHA_256))
 		return -EOPNOTSUPP;
@@ -141,13 +141,13 @@
 	return crypto_register_alg(&alg);
 }
 
-static void __exit fini(void)
+static void __exit sha256_s390_fini(void)
 {
 	crypto_unregister_alg(&alg);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(sha256_s390_init);
+module_exit(sha256_s390_fini);
 
 MODULE_ALIAS("sha256");
 
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 62f6b5a..dcc3ec2 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -3,6 +3,7 @@
 # Linux kernel version: 2.6.25-rc4
 # Wed Mar  5 11:22:59 2008
 #
+CONFIG_SCHED_MC=y
 CONFIG_MMU=y
 CONFIG_ZONE_DMA=y
 CONFIG_LOCKDEP_SUPPORT=y
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 4d3e383..77051cd 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -11,7 +11,7 @@
 
 obj-y	:=  bitmap.o traps.o time.o process.o base.o early.o \
             setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
-	    semaphore.o s390_ext.o debug.o irq.o ipl.o dis.o diag.o
+	    s390_ext.o debug.o irq.o ipl.o dis.o diag.o
 
 obj-y	+= $(if $(CONFIG_64BIT),entry64.o,entry.o)
 obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
@@ -19,7 +19,7 @@
 extra-y				+= head.o init_task.o vmlinux.lds
 
 obj-$(CONFIG_MODULES)		+= s390_ksyms.o module.o
-obj-$(CONFIG_SMP)		+= smp.o
+obj-$(CONFIG_SMP)		+= smp.o topology.o
 
 obj-$(CONFIG_AUDIT)		+= audit.o
 compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index e89f8c0..20723a0 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -162,4 +162,77 @@
 	compat_sigset_t		uc_sigmask;	/* mask last for extensibility */
 };
 
+struct __sysctl_args32;
+struct stat64_emu31;
+struct mmap_arg_struct_emu31;
+struct fadvise64_64_args;
+struct old_sigaction32;
+struct old_sigaction32;
+
+long sys32_chown16(const char __user * filename, u16 user, u16 group);
+long sys32_lchown16(const char __user * filename, u16 user, u16 group);
+long sys32_fchown16(unsigned int fd, u16 user, u16 group);
+long sys32_setregid16(u16 rgid, u16 egid);
+long sys32_setgid16(u16 gid);
+long sys32_setreuid16(u16 ruid, u16 euid);
+long sys32_setuid16(u16 uid);
+long sys32_setresuid16(u16 ruid, u16 euid, u16 suid);
+long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
+long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid);
+long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
+long sys32_setfsuid16(u16 uid);
+long sys32_setfsgid16(u16 gid);
+long sys32_getgroups16(int gidsetsize, u16 __user *grouplist);
+long sys32_setgroups16(int gidsetsize, u16 __user *grouplist);
+long sys32_getuid16(void);
+long sys32_geteuid16(void);
+long sys32_getgid16(void);
+long sys32_getegid16(void);
+long sys32_ipc(u32 call, int first, int second, int third, u32 ptr);
+long sys32_truncate64(const char __user * path, unsigned long high,
+		      unsigned long low);
+long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low);
+long sys32_sched_rr_get_interval(compat_pid_t pid,
+				 struct compat_timespec __user *interval);
+long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
+			  compat_sigset_t __user *oset, size_t sigsetsize);
+long sys32_rt_sigpending(compat_sigset_t __user *set, size_t sigsetsize);
+long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo);
+long sys32_execve(void);
+long sys32_init_module(void __user *umod, unsigned long len,
+		       const char __user *uargs);
+long sys32_delete_module(const char __user *name_user, unsigned int flags);
+long sys32_gettimeofday(struct compat_timeval __user *tv,
+			struct timezone __user *tz);
+long sys32_settimeofday(struct compat_timeval __user *tv,
+			struct timezone __user *tz);
+long sys32_pause(void);
+long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count,
+		   u32 poshi, u32 poslo);
+long sys32_pwrite64(unsigned int fd, const char __user *ubuf,
+		    size_t count, u32 poshi, u32 poslo);
+compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count);
+long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset,
+		    size_t count);
+long sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset,
+		      s32 count);
+long sys32_sysctl(struct __sysctl_args32 __user *args);
+long sys32_stat64(char __user * filename, struct stat64_emu31 __user * statbuf);
+long sys32_lstat64(char __user * filename,
+		   struct stat64_emu31 __user * statbuf);
+long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf);
+long sys32_fstatat64(unsigned int dfd, char __user *filename,
+		     struct stat64_emu31 __user* statbuf, int flag);
+unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg);
+long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg);
+long sys32_read(unsigned int fd, char __user * buf, size_t count);
+long sys32_write(unsigned int fd, char __user * buf, size_t count);
+long sys32_clone(void);
+long sys32_fadvise64(int fd, loff_t offset, size_t len, int advise);
+long sys32_fadvise64_64(struct fadvise64_64_args __user *args);
+long sys32_sigaction(int sig, const struct old_sigaction32 __user *act,
+		     struct old_sigaction32 __user *oact);
+long sys32_rt_sigaction(int sig, const struct sigaction32 __user *act,
+			struct sigaction32 __user *oact, size_t sigsetsize);
+long sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss);
 #endif /* _ASM_S390X_S390_H */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index a5692c4..c7f02e7 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -29,6 +29,7 @@
 #include <asm/lowcore.h>
 #include "compat_linux.h"
 #include "compat_ptrace.h"
+#include "entry.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
@@ -428,6 +429,10 @@
 	/* Default to using normal stack */
 	sp = (unsigned long) A(regs->gprs[15]);
 
+	/* Overflow on alternate signal stack gives SIGSEGV. */
+	if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+		return (void __user *) -1UL;
+
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
 		if (! sas_ss_flags(sp))
@@ -461,6 +466,9 @@
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe32)))
 		goto give_sigsegv;
 
+	if (frame == (void __user *) -1UL)
+		goto give_sigsegv;
+
 	if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32))
 		goto give_sigsegv;
 
@@ -514,6 +522,9 @@
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe32)))
 		goto give_sigsegv;
 
+	if (frame == (void __user *) -1UL)
+		goto give_sigsegv;
+
 	if (copy_siginfo_to_user32(&frame->info, info))
 		goto give_sigsegv;
 
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 1b2f5ce..1e7d4ac 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -73,7 +73,7 @@
 static int debug_open(struct inode *inode, struct file *file);
 static int debug_close(struct inode *inode, struct file *file);
 static debug_info_t*  debug_info_create(char *name, int pages_per_area,
-			int nr_areas, int buf_size);
+			int nr_areas, int buf_size, mode_t mode);
 static void debug_info_get(debug_info_t *);
 static void debug_info_put(debug_info_t *);
 static int debug_prolog_level_fn(debug_info_t * id,
@@ -157,7 +157,7 @@
 };
 
 /* used by dump analysis tools to determine version of debug feature */
-unsigned int debug_feature_version = __DEBUG_FEATURE_VERSION;
+static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION;
 
 /* static globals */
 
@@ -327,7 +327,8 @@
  */
 
 static debug_info_t*
-debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size)
+debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size,
+		  mode_t mode)
 {
 	debug_info_t* rc;
 
@@ -336,6 +337,8 @@
         if(!rc) 
 		goto out;
 
+	rc->mode = mode & ~S_IFMT;
+
 	/* create root directory */
         rc->debugfs_root_entry = debugfs_create_dir(rc->name,
 					debug_debugfs_root_entry);
@@ -676,23 +679,30 @@
 }
 
 /*
- * debug_register:
- * - creates and initializes debug area for the caller
- * - returns handle for debug area
+ * debug_register_mode:
+ * - Creates and initializes debug area for the caller
+ *   The mode parameter allows to specify access rights for the s390dbf files
+ * - Returns handle for debug area
  */
 
-debug_info_t*
-debug_register (char *name, int pages_per_area, int nr_areas, int buf_size)
+debug_info_t *debug_register_mode(char *name, int pages_per_area, int nr_areas,
+				  int buf_size, mode_t mode, uid_t uid,
+				  gid_t gid)
 {
 	debug_info_t *rc = NULL;
 
+	/* Since debugfs currently does not support uid/gid other than root, */
+	/* we do not allow gid/uid != 0 until we get support for that. */
+	if ((uid != 0) || (gid != 0))
+		printk(KERN_WARNING "debug: Warning - Currently only uid/gid "
+		       "= 0 are supported. Using root as owner now!");
 	if (!initialized)
 		BUG();
 	mutex_lock(&debug_mutex);
 
         /* create new debug_info */
 
-	rc = debug_info_create(name, pages_per_area, nr_areas, buf_size);
+	rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode);
 	if(!rc) 
 		goto out;
 	debug_register_view(rc, &debug_level_view);
@@ -705,6 +715,20 @@
 	mutex_unlock(&debug_mutex);
 	return rc;
 }
+EXPORT_SYMBOL(debug_register_mode);
+
+/*
+ * debug_register:
+ * - creates and initializes debug area for the caller
+ * - returns handle for debug area
+ */
+
+debug_info_t *debug_register(char *name, int pages_per_area, int nr_areas,
+			     int buf_size)
+{
+	return debug_register_mode(name, pages_per_area, nr_areas, buf_size,
+				   S_IRUSR | S_IWUSR, 0, 0);
+}
 
 /*
  * debug_unregister:
@@ -1073,15 +1097,16 @@
 	int rc = 0;
 	int i;
 	unsigned long flags;
-	mode_t mode = S_IFREG;
+	mode_t mode;
 	struct dentry *pde;
 
 	if (!id)
 		goto out;
-	if (view->prolog_proc || view->format_proc || view->header_proc)
-		mode |= S_IRUSR;
-	if (view->input_proc)
-		mode |= S_IWUSR;
+	mode = (id->mode | S_IFREG) & ~S_IXUGO;
+	if (!(view->prolog_proc || view->format_proc || view->header_proc))
+		mode &= ~(S_IRUSR | S_IRGRP | S_IROTH);
+	if (!view->input_proc)
+		mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
 	pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry,
 				id , &debug_file_ops);
 	if (!pde){
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 01832c4..540a67f 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -21,6 +21,7 @@
 #include <asm/setup.h>
 #include <asm/cpcmd.h>
 #include <asm/sclp.h>
+#include "entry.h"
 
 /*
  * Create a Kernel NSS if the SAVESYS= parameter is defined
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
new file mode 100644
index 0000000..6b18963
--- /dev/null
+++ b/arch/s390/kernel/entry.h
@@ -0,0 +1,60 @@
+#ifndef _ENTRY_H
+#define _ENTRY_H
+
+#include <linux/types.h>
+#include <linux/signal.h>
+#include <asm/ptrace.h>
+
+typedef void pgm_check_handler_t(struct pt_regs *, long);
+extern pgm_check_handler_t *pgm_check_table[128];
+pgm_check_handler_t do_protection_exception;
+pgm_check_handler_t do_dat_exception;
+
+extern int sysctl_userprocess_debug;
+
+void do_single_step(struct pt_regs *regs);
+void syscall_trace(struct pt_regs *regs, int entryexit);
+void kernel_stack_overflow(struct pt_regs * regs);
+void do_signal(struct pt_regs *regs);
+int handle_signal32(unsigned long sig, struct k_sigaction *ka,
+		    siginfo_t *info, sigset_t *oldset, struct pt_regs *regs);
+
+void do_extint(struct pt_regs *regs, unsigned short code);
+int __cpuinit start_secondary(void *cpuvoid);
+void __init startup_init(void);
+void die(const char * str, struct pt_regs * regs, long err);
+
+struct new_utsname;
+struct mmap_arg_struct;
+struct fadvise64_64_args;
+struct old_sigaction;
+struct sel_arg_struct;
+
+long sys_pipe(unsigned long __user *fildes);
+long sys_mmap2(struct mmap_arg_struct __user  *arg);
+long old_mmap(struct mmap_arg_struct __user *arg);
+long sys_ipc(uint call, int first, unsigned long second,
+	     unsigned long third, void __user *ptr);
+long s390x_newuname(struct new_utsname __user *name);
+long s390x_personality(unsigned long personality);
+long s390_fadvise64(int fd, u32 offset_high, u32 offset_low,
+		    size_t len, int advice);
+long s390_fadvise64_64(struct fadvise64_64_args __user *args);
+long s390_fallocate(int fd, int mode, loff_t offset, u32 len_high, u32 len_low);
+long sys_fork(void);
+long sys_clone(void);
+long sys_vfork(void);
+void execve_tail(void);
+long sys_execve(void);
+int sys_sigsuspend(int history0, int history1, old_sigset_t mask);
+long sys_sigaction(int sig, const struct old_sigaction __user *act,
+		   struct old_sigaction __user *oact);
+long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss);
+long sys_sigreturn(void);
+long sys_rt_sigreturn(void);
+long sys32_sigreturn(void);
+long sys32_rt_sigreturn(void);
+long old_select(struct sel_arg_struct __user *arg);
+long sys_ptrace(long request, long pid, long addr, long data);
+
+#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index efde6e1..cd959c0 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -475,6 +475,7 @@
 pgm_no_vtime:
 #endif
 	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
+	mvc	SP_ARGS(8,%r15),__LC_LAST_BREAK
 	TRACE_IRQS_OFF
 	lgf	%r3,__LC_PGM_ILC	# load program interruption code
 	lghi	%r8,0x7f
@@ -847,6 +848,7 @@
 	je	0f
 	la	%r1,__LC_SAVE_AREA+32
 0:	mvc	SP_R12(32,%r15),0(%r1)	# move %r12-%r15 to stack
+	mvc	SP_ARGS(8,%r15),__LC_LAST_BREAK
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	jg	kernel_stack_overflow
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 375232c..5325424 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -655,7 +655,7 @@
 
 static struct kset *reipl_kset;
 
-void reipl_run(struct shutdown_trigger *trigger)
+static void reipl_run(struct shutdown_trigger *trigger)
 {
 	struct ccw_dev_id devid;
 	static char buf[100];
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index c5549a2..ed04d13 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -360,7 +360,7 @@
  *	- When the probed function returns, this probe
  *		causes the handlers to fire
  */
-void kretprobe_trampoline_holder(void)
+static void __used kretprobe_trampoline_holder(void)
 {
 	asm volatile(".global kretprobe_trampoline\n"
 		     "kretprobe_trampoline: bcr 0,0\n");
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index ce20315..c1aff19 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -36,6 +36,8 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/utsname.h>
+#include <linux/tick.h>
+#include <linux/elfcore.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -44,6 +46,7 @@
 #include <asm/irq.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
+#include "entry.h"
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
 
@@ -76,6 +79,7 @@
  * Need to know about CPUs going idle?
  */
 static ATOMIC_NOTIFIER_HEAD(idle_chain);
+DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
 
 int register_idle_notifier(struct notifier_block *nb)
 {
@@ -89,9 +93,33 @@
 }
 EXPORT_SYMBOL(unregister_idle_notifier);
 
-void do_monitor_call(struct pt_regs *regs, long interruption_code)
+static int s390_idle_enter(void)
 {
-#ifdef CONFIG_SMP
+	struct s390_idle_data *idle;
+	int nr_calls = 0;
+	void *hcpu;
+	int rc;
+
+	hcpu = (void *)(long)smp_processor_id();
+	rc = __atomic_notifier_call_chain(&idle_chain, S390_CPU_IDLE, hcpu, -1,
+					  &nr_calls);
+	if (rc == NOTIFY_BAD) {
+		nr_calls--;
+		__atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
+					     hcpu, nr_calls, NULL);
+		return rc;
+	}
+	idle = &__get_cpu_var(s390_idle);
+	spin_lock(&idle->lock);
+	idle->idle_count++;
+	idle->in_idle = 1;
+	idle->idle_enter = get_clock();
+	spin_unlock(&idle->lock);
+	return NOTIFY_OK;
+}
+
+void s390_idle_leave(void)
+{
 	struct s390_idle_data *idle;
 
 	idle = &__get_cpu_var(s390_idle);
@@ -99,10 +127,6 @@
 	idle->idle_time += get_clock() - idle->idle_enter;
 	idle->in_idle = 0;
 	spin_unlock(&idle->lock);
-#endif
-	/* disable monitor call class 0 */
-	__ctl_clear_bit(8, 15);
-
 	atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
 				   (void *)(long) smp_processor_id());
 }
@@ -113,61 +137,30 @@
  */
 static void default_idle(void)
 {
-	int cpu, rc;
-	int nr_calls = 0;
-	void *hcpu;
-#ifdef CONFIG_SMP
-	struct s390_idle_data *idle;
-#endif
-
 	/* CPU is going idle. */
-	cpu = smp_processor_id();
-	hcpu = (void *)(long)cpu;
 	local_irq_disable();
 	if (need_resched()) {
 		local_irq_enable();
 		return;
 	}
-
-	rc = __atomic_notifier_call_chain(&idle_chain, S390_CPU_IDLE, hcpu, -1,
-					  &nr_calls);
-	if (rc == NOTIFY_BAD) {
-		nr_calls--;
-		__atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
-					     hcpu, nr_calls, NULL);
+	if (s390_idle_enter() == NOTIFY_BAD) {
 		local_irq_enable();
 		return;
 	}
-
-	/* enable monitor call class 0 */
-	__ctl_set_bit(8, 15);
-
 #ifdef CONFIG_HOTPLUG_CPU
-	if (cpu_is_offline(cpu)) {
+	if (cpu_is_offline(smp_processor_id())) {
 		preempt_enable_no_resched();
 		cpu_die();
 	}
 #endif
-
 	local_mcck_disable();
 	if (test_thread_flag(TIF_MCCK_PENDING)) {
 		local_mcck_enable();
-		/* disable monitor call class 0 */
-		__ctl_clear_bit(8, 15);
-		atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
-					   hcpu);
+		s390_idle_leave();
 		local_irq_enable();
 		s390_handle_mcck();
 		return;
 	}
-#ifdef CONFIG_SMP
-	idle = &__get_cpu_var(s390_idle);
-	spin_lock(&idle->lock);
-	idle->idle_count++;
-	idle->in_idle = 1;
-	idle->idle_enter = get_clock();
-	spin_unlock(&idle->lock);
-#endif
 	trace_hardirqs_on();
 	/* Wait for external, I/O or machine check interrupt. */
 	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
@@ -177,9 +170,10 @@
 void cpu_idle(void)
 {
 	for (;;) {
+		tick_nohz_stop_sched_tick();
 		while (!need_resched())
 			default_idle();
-
+		tick_nohz_restart_sched_tick();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
@@ -201,6 +195,7 @@
 	/* Show stack backtrace if pt_regs is from kernel mode */
 	if (!(regs->psw.mask & PSW_MASK_PSTATE))
 		show_trace(NULL, (unsigned long *) regs->gprs[15]);
+	show_last_breaking_event(regs);
 }
 
 extern void kernel_thread_starter(void);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 6e036ba..58a0642 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -41,6 +41,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
+#include "entry.h"
 
 #ifdef CONFIG_COMPAT
 #include "compat_ptrace.h"
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index acf93db..e019b41 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -13,11 +13,12 @@
 #include <linux/errno.h>
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
-
+#include <asm/cpu.h>
 #include <asm/lowcore.h>
 #include <asm/s390_ext.h>
 #include <asm/irq_regs.h>
 #include <asm/irq.h>
+#include "entry.h"
 
 /*
  * ext_int_hash[index] is the start of the list for all external interrupts
@@ -119,13 +120,10 @@
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
-	asm volatile ("mc 0,0");
-	if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
-		/**
-		 * Make sure that the i/o interrupt did not "overtake"
-		 * the last HZ timer interrupt.
-		 */
-		account_ticks(S390_lowcore.int_clock);
+	s390_idle_check();
+	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+		/* Serve timer interrupts first. */
+		clock_comparator_work();
 	kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++;
         index = ext_hash(code);
 	for (p = ext_int_hash[index]; p; p = p->next) {
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 7234c73..48238a1 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -27,13 +27,6 @@
 EXPORT_SYMBOL(_sb_findmap);
 
 /*
- * semaphore ops
- */
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-
-/*
  * binfmt_elf loader 
  */
 extern int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs);
diff --git a/arch/s390/kernel/semaphore.c b/arch/s390/kernel/semaphore.c
deleted file mode 100644
index 191303f..0000000
--- a/arch/s390/kernel/semaphore.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- *  linux/arch/s390/kernel/semaphore.c
- *
- *  S390 version
- *    Copyright (C) 1998-2000 IBM Corporation
- *    Author(s): Martin Schwidefsky
- *
- *  Derived from "linux/arch/i386/kernel/semaphore.c
- *    Copyright (C) 1999, Linus Torvalds
- *
- */
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-
-#include <asm/semaphore.h>
-
-/*
- * Atomically update sem->count. Equivalent to:
- *   old_val = sem->count.counter;
- *   new_val = ((old_val >= 0) ? old_val : 0) + incr;
- *   sem->count.counter = new_val;
- *   return old_val;
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	int old_val, new_val;
-
-	asm volatile(
-		"	l	%0,0(%3)\n"
-		"0:	ltr	%1,%0\n"
-		"	jhe	1f\n"
-		"	lhi	%1,0\n"
-		"1:	ar	%1,%4\n"
-		"	cs	%0,%1,0(%3)\n"
-		"	jl	0b\n"
-		: "=&d" (old_val), "=&d" (new_val), "=m" (sem->count)
-		: "a" (&sem->count), "d" (incr), "m" (sem->count)
-		: "cc");
-	return old_val;
-}
-
-/*
- * The inline function up() incremented count but the result
- * was <= 0. This indicates that some process is waiting on
- * the semaphore. The semaphore is free and we'll wake the
- * first sleeping process, so we set count to 1 unless some
- * other cpu has called up in the meantime in which case
- * we just increment count by 1.
- */
-void __up(struct semaphore *sem)
-{
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-
-/*
- * The inline function down() decremented count and the result
- * was < 0. The wait loop will atomically test and update the
- * semaphore counter following the rules:
- *   count > 0: decrement count, wake up queue and exit.
- *   count <= 0: set count to -1, go to sleep.
- */
-void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-	wake_up(&sem->wait);
-}
-
-/*
- * Same as __down() with an additional test for signals.
- * If a signal is pending the count is updated as follows:
- *   count > 0: wake up queue and exit.
- *   count <= 0: set count to 0, wake up queue and exit.
- */
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_INTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			__sem_update_count(sem, 0);
-			retval = -EINTR;
-			break;
-		}
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-	wake_up(&sem->wait);
-	return retval;
-}
-
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 290e504..7141147 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -39,6 +39,7 @@
 #include <linux/pfn.h>
 #include <linux/ctype.h>
 #include <linux/reboot.h>
+#include <linux/topology.h>
 
 #include <asm/ipl.h>
 #include <asm/uaccess.h>
@@ -427,7 +428,7 @@
 	lc->io_new_psw.mask = psw_kernel_bits;
 	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
 	lc->ipl_device = S390_lowcore.ipl_device;
-	lc->jiffy_timer = -1LL;
+	lc->clock_comparator = -1ULL;
 	lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
 	lc->async_stack = (unsigned long)
 		__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
@@ -687,7 +688,7 @@
 	return S390_lowcore.stfl_fac_list;
 }
 
-static __init int stfle(unsigned long long *list, int doublewords)
+static int __init __stfle(unsigned long long *list, int doublewords)
 {
 	typedef struct { unsigned long long _[doublewords]; } addrtype;
 	register unsigned long __nr asm("0") = doublewords - 1;
@@ -697,6 +698,13 @@
 	return __nr + 1;
 }
 
+int __init stfle(unsigned long long *list, int doublewords)
+{
+	if (!(stfl() & (1UL << 24)))
+		return -EOPNOTSUPP;
+	return __stfle(list, doublewords);
+}
+
 /*
  * Setup hardware capabilities.
  */
@@ -741,7 +749,7 @@
 	 *   HWCAP_S390_DFP bit 6.
 	 */
 	if ((elf_hwcap & (1UL << 2)) &&
-	    stfle(&facility_list_extended, 1) > 0) {
+	    __stfle(&facility_list_extended, 1) > 0) {
 		if (facility_list_extended & (1ULL << (64 - 43)))
 			elf_hwcap |= 1UL << 6;
 	}
@@ -823,6 +831,7 @@
 
         cpu_init();
         __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
+	s390_init_cpu_topology();
 
 	/*
 	 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 4449bf3..b976820 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -27,6 +27,7 @@
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/lowcore.h>
+#include "entry.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
@@ -235,6 +236,10 @@
 	/* Default to using normal stack */
 	sp = regs->gprs[15];
 
+	/* Overflow on alternate signal stack gives SIGSEGV. */
+	if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+		return (void __user *) -1UL;
+
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
 		if (! sas_ss_flags(sp))
@@ -270,6 +275,9 @@
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe)))
 		goto give_sigsegv;
 
+	if (frame == (void __user *) -1UL)
+		goto give_sigsegv;
+
 	if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE))
 		goto give_sigsegv;
 
@@ -327,6 +335,9 @@
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe)))
 		goto give_sigsegv;
 
+	if (frame == (void __user *) -1UL)
+		goto give_sigsegv;
+
 	if (copy_siginfo_to_user(&frame->info, info))
 		goto give_sigsegv;
 
@@ -474,11 +485,6 @@
 		int ret;
 #ifdef CONFIG_COMPAT
 		if (test_thread_flag(TIF_31BIT)) {
-			extern int handle_signal32(unsigned long sig,
-						   struct k_sigaction *ka,
-						   siginfo_t *info,
-						   sigset_t *oldset,
-						   struct pt_regs *regs);
 			ret = handle_signal32(signr, &ka, &info, oldset, regs);
 	        }
 		else
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 8f894d3..0dfa988 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -44,6 +44,7 @@
 #include <asm/lowcore.h>
 #include <asm/sclp.h>
 #include <asm/cpu.h>
+#include "entry.h"
 
 /*
  * An array with a pointer the lowcore of every CPU.
@@ -67,13 +68,12 @@
 	CPU_STATE_CONFIGURED,
 };
 
-#ifdef CONFIG_HOTPLUG_CPU
-static DEFINE_MUTEX(smp_cpu_state_mutex);
-#endif
+DEFINE_MUTEX(smp_cpu_state_mutex);
+int smp_cpu_polarization[NR_CPUS];
 static int smp_cpu_state[NR_CPUS];
+static int cpu_management;
 
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
-DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
 
 static void smp_ext_bitcall(int, ec_bit_sig);
 
@@ -298,7 +298,7 @@
 /*
  * this function sends a 'purge tlb' signal to another CPU.
  */
-void smp_ptlb_callback(void *info)
+static void smp_ptlb_callback(void *info)
 {
 	__tlb_flush_local();
 }
@@ -456,6 +456,7 @@
 		if (cpu_known(cpu_id))
 			continue;
 		__cpu_logical_map[logical_cpu] = cpu_id;
+		smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
 		if (!cpu_stopped(logical_cpu))
 			continue;
 		cpu_set(logical_cpu, cpu_present_map);
@@ -489,6 +490,7 @@
 		if (cpu_known(cpu_id))
 			continue;
 		__cpu_logical_map[logical_cpu] = cpu_id;
+		smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
 		cpu_set(logical_cpu, cpu_present_map);
 		if (cpu >= info->configured)
 			smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY;
@@ -846,6 +848,7 @@
 	S390_lowcore.percpu_offset = __per_cpu_offset[0];
 	current_set[0] = current;
 	smp_cpu_state[0] = CPU_STATE_CONFIGURED;
+	smp_cpu_polarization[0] = POLARIZATION_UNKNWN;
 	spin_lock_init(&(&__get_cpu_var(s390_idle))->lock);
 }
 
@@ -897,15 +900,19 @@
 	case 0:
 		if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) {
 			rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]);
-			if (!rc)
+			if (!rc) {
 				smp_cpu_state[cpu] = CPU_STATE_STANDBY;
+				smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
+			}
 		}
 		break;
 	case 1:
 		if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) {
 			rc = sclp_cpu_configure(__cpu_logical_map[cpu]);
-			if (!rc)
+			if (!rc) {
 				smp_cpu_state[cpu] = CPU_STATE_CONFIGURED;
+				smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
+			}
 		}
 		break;
 	default:
@@ -919,6 +926,34 @@
 static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
 #endif /* CONFIG_HOTPLUG_CPU */
 
+static ssize_t cpu_polarization_show(struct sys_device *dev, char *buf)
+{
+	int cpu = dev->id;
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	switch (smp_cpu_polarization[cpu]) {
+	case POLARIZATION_HRZ:
+		count = sprintf(buf, "horizontal\n");
+		break;
+	case POLARIZATION_VL:
+		count = sprintf(buf, "vertical:low\n");
+		break;
+	case POLARIZATION_VM:
+		count = sprintf(buf, "vertical:medium\n");
+		break;
+	case POLARIZATION_VH:
+		count = sprintf(buf, "vertical:high\n");
+		break;
+	default:
+		count = sprintf(buf, "unknown\n");
+		break;
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL);
+
 static ssize_t show_cpu_address(struct sys_device *dev, char *buf)
 {
 	return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]);
@@ -931,6 +966,7 @@
 	&attr_configure.attr,
 #endif
 	&attr_address.attr,
+	&attr_polarization.attr,
 	NULL,
 };
 
@@ -1075,11 +1111,48 @@
 out:
 	put_online_cpus();
 	mutex_unlock(&smp_cpu_state_mutex);
+	if (!cpus_empty(newcpus))
+		topology_schedule_update();
 	return rc ? rc : count;
 }
 static SYSDEV_ATTR(rescan, 0200, NULL, rescan_store);
 #endif /* CONFIG_HOTPLUG_CPU */
 
+static ssize_t dispatching_show(struct sys_device *dev, char *buf)
+{
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	count = sprintf(buf, "%d\n", cpu_management);
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+
+static ssize_t dispatching_store(struct sys_device *dev, const char *buf,
+				 size_t count)
+{
+	int val, rc;
+	char delim;
+
+	if (sscanf(buf, "%d %c", &val, &delim) != 1)
+		return -EINVAL;
+	if (val != 0 && val != 1)
+		return -EINVAL;
+	rc = 0;
+	mutex_lock(&smp_cpu_state_mutex);
+	get_online_cpus();
+	if (cpu_management == val)
+		goto out;
+	rc = topology_set_cpu_management(val);
+	if (!rc)
+		cpu_management = val;
+out:
+	put_online_cpus();
+	mutex_unlock(&smp_cpu_state_mutex);
+	return rc ? rc : count;
+}
+static SYSDEV_ATTR(dispatching, 0644, dispatching_show, dispatching_store);
+
 static int __init topology_init(void)
 {
 	int cpu;
@@ -1093,6 +1166,10 @@
 	if (rc)
 		return rc;
 #endif
+	rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+			       &attr_dispatching.attr);
+	if (rc)
+		return rc;
 	for_each_present_cpu(cpu) {
 		rc = smp_add_present_cpu(cpu);
 		if (rc)
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index fefee99..988d0d6 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -29,8 +29,8 @@
 #include <linux/personality.h>
 #include <linux/unistd.h>
 #include <linux/ipc.h>
-
 #include <asm/uaccess.h>
+#include "entry.h"
 
 /*
  * sys_pipe() is the normal C calling standard for creating
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index cb232c1..7aec676 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -30,7 +30,7 @@
 #include <linux/timex.h>
 #include <linux/notifier.h>
 #include <linux/clocksource.h>
-
+#include <linux/clockchips.h>
 #include <asm/uaccess.h>
 #include <asm/delay.h>
 #include <asm/s390_ext.h>
@@ -39,6 +39,7 @@
 #include <asm/irq_regs.h>
 #include <asm/timer.h>
 #include <asm/etr.h>
+#include <asm/cio.h>
 
 /* change this if you have some constant time drift */
 #define USECS_PER_JIFFY     ((unsigned long) 1000000/HZ)
@@ -57,16 +58,16 @@
 
 static ext_int_info_t ext_int_info_cc;
 static ext_int_info_t ext_int_etr_cc;
-static u64 init_timer_cc;
 static u64 jiffies_timer_cc;
-static u64 xtime_cc;
+
+static DEFINE_PER_CPU(struct clock_event_device, comparators);
 
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
 unsigned long long sched_clock(void)
 {
-	return ((get_clock() - jiffies_timer_cc) * 125) >> 9;
+	return ((get_clock_xt() - jiffies_timer_cc) * 125) >> 9;
 }
 
 /*
@@ -95,162 +96,40 @@
 #define s390_do_profile()	do { ; } while(0)
 #endif /* CONFIG_PROFILING */
 
-/*
- * Advance the per cpu tick counter up to the time given with the
- * "time" argument. The per cpu update consists of accounting
- * the virtual cpu time, calling update_process_times and calling
- * the profiling hook. If xtime is before time it is advanced as well.
- */
-void account_ticks(u64 time)
+void clock_comparator_work(void)
 {
-	__u32 ticks;
-	__u64 tmp;
+	struct clock_event_device *cd;
 
-	/* Calculate how many ticks have passed. */
-	if (time < S390_lowcore.jiffy_timer)
-		return;
-	tmp = time - S390_lowcore.jiffy_timer;
-	if (tmp >= 2*CLK_TICKS_PER_JIFFY) {  /* more than two ticks ? */
-		ticks = __div(tmp, CLK_TICKS_PER_JIFFY) + 1;
-		S390_lowcore.jiffy_timer +=
-			CLK_TICKS_PER_JIFFY * (__u64) ticks;
-	} else if (tmp >= CLK_TICKS_PER_JIFFY) {
-		ticks = 2;
-		S390_lowcore.jiffy_timer += 2*CLK_TICKS_PER_JIFFY;
-	} else {
-		ticks = 1;
-		S390_lowcore.jiffy_timer += CLK_TICKS_PER_JIFFY;
-	}
-
-#ifdef CONFIG_SMP
-	/*
-	 * Do not rely on the boot cpu to do the calls to do_timer.
-	 * Spread it over all cpus instead.
-	 */
-	write_seqlock(&xtime_lock);
-	if (S390_lowcore.jiffy_timer > xtime_cc) {
-		__u32 xticks;
-		tmp = S390_lowcore.jiffy_timer - xtime_cc;
-		if (tmp >= 2*CLK_TICKS_PER_JIFFY) {
-			xticks = __div(tmp, CLK_TICKS_PER_JIFFY);
-			xtime_cc += (__u64) xticks * CLK_TICKS_PER_JIFFY;
-		} else {
-			xticks = 1;
-			xtime_cc += CLK_TICKS_PER_JIFFY;
-		}
-		do_timer(xticks);
-	}
-	write_sequnlock(&xtime_lock);
-#else
-	do_timer(ticks);
-#endif
-
-	while (ticks--)
-		update_process_times(user_mode(get_irq_regs()));
-
+	S390_lowcore.clock_comparator = -1ULL;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	cd = &__get_cpu_var(comparators);
+	cd->event_handler(cd);
 	s390_do_profile();
 }
 
-#ifdef CONFIG_NO_IDLE_HZ
-
-#ifdef CONFIG_NO_IDLE_HZ_INIT
-int sysctl_hz_timer = 0;
-#else
-int sysctl_hz_timer = 1;
-#endif
-
 /*
- * Stop the HZ tick on the current CPU.
- * Only cpu_idle may call this function.
+ * Fixup the clock comparator.
  */
-static void stop_hz_timer(void)
+static void fixup_clock_comparator(unsigned long long delta)
 {
-	unsigned long flags;
-	unsigned long seq, next;
-	__u64 timer, todval;
-	int cpu = smp_processor_id();
-
-	if (sysctl_hz_timer != 0)
+	/* If nobody is waiting there's nothing to fix. */
+	if (S390_lowcore.clock_comparator == -1ULL)
 		return;
-
-	cpu_set(cpu, nohz_cpu_mask);
-
-	/*
-	 * Leave the clock comparator set up for the next timer
-	 * tick if either rcu or a softirq is pending.
-	 */
-	if (rcu_needs_cpu(cpu) || local_softirq_pending()) {
-		cpu_clear(cpu, nohz_cpu_mask);
-		return;
-	}
-
-	/*
-	 * This cpu is going really idle. Set up the clock comparator
-	 * for the next event.
-	 */
-	next = next_timer_interrupt();
-	do {
-		seq = read_seqbegin_irqsave(&xtime_lock, flags);
-		timer = ((__u64) next) - ((__u64) jiffies) + jiffies_64;
-	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-	todval = -1ULL;
-	/* Be careful about overflows. */
-	if (timer < (-1ULL / CLK_TICKS_PER_JIFFY)) {
-		timer = jiffies_timer_cc + timer * CLK_TICKS_PER_JIFFY;
-		if (timer >= jiffies_timer_cc)
-			todval = timer;
-	}
-	set_clock_comparator(todval);
+	S390_lowcore.clock_comparator += delta;
+	set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
-/*
- * Start the HZ tick on the current CPU.
- * Only cpu_idle may call this function.
- */
-static void start_hz_timer(void)
+static int s390_next_event(unsigned long delta,
+			   struct clock_event_device *evt)
 {
-	if (!cpu_isset(smp_processor_id(), nohz_cpu_mask))
-		return;
-	account_ticks(get_clock());
-	set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
-	cpu_clear(smp_processor_id(), nohz_cpu_mask);
+	S390_lowcore.clock_comparator = get_clock() + delta;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	return 0;
 }
 
-static int nohz_idle_notify(struct notifier_block *self,
-			    unsigned long action, void *hcpu)
+static void s390_set_mode(enum clock_event_mode mode,
+			  struct clock_event_device *evt)
 {
-	switch (action) {
-	case S390_CPU_IDLE:
-		stop_hz_timer();
-		break;
-	case S390_CPU_NOT_IDLE:
-		start_hz_timer();
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block nohz_idle_nb = {
-	.notifier_call = nohz_idle_notify,
-};
-
-static void __init nohz_init(void)
-{
-	if (register_idle_notifier(&nohz_idle_nb))
-		panic("Couldn't register idle notifier");
-}
-
-#endif
-
-/*
- * Set up per cpu jiffy timer and set the clock comparator.
- */
-static void setup_jiffy_timer(void)
-{
-	/* Set up clock comparator to next jiffy. */
-	S390_lowcore.jiffy_timer =
-		jiffies_timer_cc + (jiffies_64 + 1) * CLK_TICKS_PER_JIFFY;
-	set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
 }
 
 /*
@@ -259,7 +138,26 @@
  */
 void init_cpu_timer(void)
 {
-	setup_jiffy_timer();
+	struct clock_event_device *cd;
+	int cpu;
+
+	S390_lowcore.clock_comparator = -1ULL;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+
+	cpu = smp_processor_id();
+	cd = &per_cpu(comparators, cpu);
+	cd->name		= "comparator";
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+	cd->mult		= 16777;
+	cd->shift		= 12;
+	cd->min_delta_ns	= 1;
+	cd->max_delta_ns	= LONG_MAX;
+	cd->rating		= 400;
+	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->set_next_event	= s390_next_event;
+	cd->set_mode		= s390_set_mode;
+
+	clockevents_register_device(cd);
 
 	/* Enable clock comparator timer interrupt. */
 	__ctl_set_bit(0,11);
@@ -270,8 +168,6 @@
 
 static void clock_comparator_interrupt(__u16 code)
 {
-	/* set clock comparator for next tick */
-	set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
 }
 
 static void etr_reset(void);
@@ -316,8 +212,9 @@
  */
 void __init time_init(void)
 {
+	u64 init_timer_cc;
+
 	init_timer_cc = reset_tod_clock();
-	xtime_cc = init_timer_cc + CLK_TICKS_PER_JIFFY;
 	jiffies_timer_cc = init_timer_cc - jiffies_64 * CLK_TICKS_PER_JIFFY;
 
 	/* set xtime */
@@ -342,10 +239,6 @@
 	/* Enable TOD clock interrupts on the boot cpu. */
 	init_cpu_timer();
 
-#ifdef CONFIG_NO_IDLE_HZ
-	nohz_init();
-#endif
-
 #ifdef CONFIG_VIRT_TIMER
 	vtime_init();
 #endif
@@ -699,53 +592,49 @@
 }
 
 /*
- * The time is "clock". xtime is what we think the time is.
+ * The time is "clock". old is what we think the time is.
  * Adjust the value by a multiple of jiffies and add the delta to ntp.
  * "delay" is an approximation how long the synchronization took. If
  * the time correction is positive, then "delay" is subtracted from
  * the time difference and only the remaining part is passed to ntp.
  */
-static void etr_adjust_time(unsigned long long clock, unsigned long long delay)
+static unsigned long long etr_adjust_time(unsigned long long old,
+					  unsigned long long clock,
+					  unsigned long long delay)
 {
 	unsigned long long delta, ticks;
 	struct timex adjust;
 
-	/*
-	 * We don't have to take the xtime lock because the cpu
-	 * executing etr_adjust_time is running disabled in
-	 * tasklet context and all other cpus are looping in
-	 * etr_sync_cpu_start.
-	 */
-	if (clock > xtime_cc) {
+	if (clock > old) {
 		/* It is later than we thought. */
-		delta = ticks = clock - xtime_cc;
+		delta = ticks = clock - old;
 		delta = ticks = (delta < delay) ? 0 : delta - delay;
 		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		init_timer_cc = init_timer_cc + delta;
-		jiffies_timer_cc = jiffies_timer_cc + delta;
-		xtime_cc = xtime_cc + delta;
 		adjust.offset = ticks * (1000000 / HZ);
 	} else {
 		/* It is earlier than we thought. */
-		delta = ticks = xtime_cc - clock;
+		delta = ticks = old - clock;
 		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		init_timer_cc = init_timer_cc - delta;
-		jiffies_timer_cc = jiffies_timer_cc - delta;
-		xtime_cc = xtime_cc - delta;
+		delta = -delta;
 		adjust.offset = -ticks * (1000000 / HZ);
 	}
+	jiffies_timer_cc += delta;
 	if (adjust.offset != 0) {
 		printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
 		       adjust.offset);
 		adjust.modes = ADJ_OFFSET_SINGLESHOT;
 		do_adjtimex(&adjust);
 	}
+	return delta;
 }
 
+static struct {
+	int in_sync;
+	unsigned long long fixup_cc;
+} etr_sync;
+
 static void etr_sync_cpu_start(void *dummy)
 {
-	int *in_sync = dummy;
-
 	etr_enable_sync_clock();
 	/*
 	 * This looks like a busy wait loop but it isn't. etr_sync_cpus
@@ -753,7 +642,7 @@
 	 * __udelay will stop the cpu on an enabled wait psw until the
 	 * TOD is running again.
 	 */
-	while (*in_sync == 0) {
+	while (etr_sync.in_sync == 0) {
 		__udelay(1);
 		/*
 		 * A different cpu changes *in_sync. Therefore use
@@ -761,14 +650,14 @@
 		 */
 		barrier();
 	}
-	if (*in_sync != 1)
+	if (etr_sync.in_sync != 1)
 		/* Didn't work. Clear per-cpu in sync bit again. */
 		etr_disable_sync_clock(NULL);
 	/*
 	 * This round of TOD syncing is done. Set the clock comparator
 	 * to the next tick and let the processor continue.
 	 */
-	setup_jiffy_timer();
+	fixup_clock_comparator(etr_sync.fixup_cc);
 }
 
 static void etr_sync_cpu_end(void *dummy)
@@ -783,8 +672,8 @@
 static int etr_sync_clock(struct etr_aib *aib, int port)
 {
 	struct etr_aib *sync_port;
-	unsigned long long clock, delay;
-	int in_sync, follows;
+	unsigned long long clock, old_clock, delay, delta;
+	int follows;
 	int rc;
 
 	/* Check if the current aib is adjacent to the sync port aib. */
@@ -799,9 +688,9 @@
 	 * successfully synced the clock. smp_call_function will
 	 * return after all other cpus are in etr_sync_cpu_start.
 	 */
-	in_sync = 0;
+	memset(&etr_sync, 0, sizeof(etr_sync));
 	preempt_disable();
-	smp_call_function(etr_sync_cpu_start,&in_sync,0,0);
+	smp_call_function(etr_sync_cpu_start, NULL, 0, 0);
 	local_irq_disable();
 	etr_enable_sync_clock();
 
@@ -809,6 +698,7 @@
 	__ctl_set_bit(14, 21);
 	__ctl_set_bit(0, 29);
 	clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
+	old_clock = get_clock();
 	if (set_clock(clock) == 0) {
 		__udelay(1);	/* Wait for the clock to start. */
 		__ctl_clear_bit(0, 29);
@@ -817,16 +707,17 @@
 		/* Adjust Linux timing variables. */
 		delay = (unsigned long long)
 			(aib->edf2.etv - sync_port->edf2.etv) << 32;
-		etr_adjust_time(clock, delay);
-		setup_jiffy_timer();
+		delta = etr_adjust_time(old_clock, clock, delay);
+		etr_sync.fixup_cc = delta;
+		fixup_clock_comparator(delta);
 		/* Verify that the clock is properly set. */
 		if (!etr_aib_follows(sync_port, aib, port)) {
 			/* Didn't work. */
 			etr_disable_sync_clock(NULL);
-			in_sync = -EAGAIN;
+			etr_sync.in_sync = -EAGAIN;
 			rc = -EAGAIN;
 		} else {
-			in_sync = 1;
+			etr_sync.in_sync = 1;
 			rc = 0;
 		}
 	} else {
@@ -834,7 +725,7 @@
 		__ctl_clear_bit(0, 29);
 		__ctl_clear_bit(14, 21);
 		etr_disable_sync_clock(NULL);
-		in_sync = -EAGAIN;
+		etr_sync.in_sync = -EAGAIN;
 		rc = -EAGAIN;
 	}
 	local_irq_enable();
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
new file mode 100644
index 0000000..12b39b3
--- /dev/null
+++ b/arch/s390/kernel/topology.c
@@ -0,0 +1,314 @@
+/*
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/bootmem.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <asm/delay.h>
+#include <asm/s390_ext.h>
+#include <asm/sysinfo.h>
+
+#define CPU_BITS 64
+#define NR_MAG 6
+
+#define PTF_HORIZONTAL	(0UL)
+#define PTF_VERTICAL	(1UL)
+#define PTF_CHECK	(2UL)
+
+struct tl_cpu {
+	unsigned char reserved0[4];
+	unsigned char :6;
+	unsigned char pp:2;
+	unsigned char reserved1;
+	unsigned short origin;
+	unsigned long mask[CPU_BITS / BITS_PER_LONG];
+};
+
+struct tl_container {
+	unsigned char reserved[8];
+};
+
+union tl_entry {
+	unsigned char nl;
+	struct tl_cpu cpu;
+	struct tl_container container;
+};
+
+struct tl_info {
+	unsigned char reserved0[2];
+	unsigned short length;
+	unsigned char mag[NR_MAG];
+	unsigned char reserved1;
+	unsigned char mnest;
+	unsigned char reserved2[4];
+	union tl_entry tle[0];
+};
+
+struct core_info {
+	struct core_info *next;
+	cpumask_t mask;
+};
+
+static void topology_work_fn(struct work_struct *work);
+static struct tl_info *tl_info;
+static struct core_info core_info;
+static int machine_has_topology;
+static int machine_has_topology_irq;
+static struct timer_list topology_timer;
+static void set_topology_timer(void);
+static DECLARE_WORK(topology_work, topology_work_fn);
+
+cpumask_t cpu_coregroup_map(unsigned int cpu)
+{
+	struct core_info *core = &core_info;
+	cpumask_t mask;
+
+	cpus_clear(mask);
+	if (!machine_has_topology)
+		return cpu_present_map;
+	mutex_lock(&smp_cpu_state_mutex);
+	while (core) {
+		if (cpu_isset(cpu, core->mask)) {
+			mask = core->mask;
+			break;
+		}
+		core = core->next;
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	if (cpus_empty(mask))
+		mask = cpumask_of_cpu(cpu);
+	return mask;
+}
+
+static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
+{
+	unsigned int cpu;
+
+	for (cpu = find_first_bit(&tl_cpu->mask[0], CPU_BITS);
+	     cpu < CPU_BITS;
+	     cpu = find_next_bit(&tl_cpu->mask[0], CPU_BITS, cpu + 1))
+	{
+		unsigned int rcpu, lcpu;
+
+		rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin;
+		for_each_present_cpu(lcpu) {
+			if (__cpu_logical_map[lcpu] == rcpu) {
+				cpu_set(lcpu, core->mask);
+				smp_cpu_polarization[lcpu] = tl_cpu->pp;
+			}
+		}
+	}
+}
+
+static void clear_cores(void)
+{
+	struct core_info *core = &core_info;
+
+	while (core) {
+		cpus_clear(core->mask);
+		core = core->next;
+	}
+}
+
+static union tl_entry *next_tle(union tl_entry *tle)
+{
+	if (tle->nl)
+		return (union tl_entry *)((struct tl_container *)tle + 1);
+	else
+		return (union tl_entry *)((struct tl_cpu *)tle + 1);
+}
+
+static void tl_to_cores(struct tl_info *info)
+{
+	union tl_entry *tle, *end;
+	struct core_info *core = &core_info;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	clear_cores();
+	tle = info->tle;
+	end = (union tl_entry *)((unsigned long)info + info->length);
+	while (tle < end) {
+		switch (tle->nl) {
+		case 5:
+		case 4:
+		case 3:
+		case 2:
+			break;
+		case 1:
+			core = core->next;
+			break;
+		case 0:
+			add_cpus_to_core(&tle->cpu, core);
+			break;
+		default:
+			clear_cores();
+			machine_has_topology = 0;
+			return;
+		}
+		tle = next_tle(tle);
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+}
+
+static void topology_update_polarization_simple(void)
+{
+	int cpu;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	for_each_present_cpu(cpu)
+		smp_cpu_polarization[cpu] = POLARIZATION_HRZ;
+	mutex_unlock(&smp_cpu_state_mutex);
+}
+
+static int ptf(unsigned long fc)
+{
+	int rc;
+
+	asm volatile(
+		"	.insn	rre,0xb9a20000,%1,%1\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (rc)
+		: "d" (fc)  : "cc");
+	return rc;
+}
+
+int topology_set_cpu_management(int fc)
+{
+	int cpu;
+	int rc;
+
+	if (!machine_has_topology)
+		return -EOPNOTSUPP;
+	if (fc)
+		rc = ptf(PTF_VERTICAL);
+	else
+		rc = ptf(PTF_HORIZONTAL);
+	if (rc)
+		return -EBUSY;
+	for_each_present_cpu(cpu)
+		smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
+	return rc;
+}
+
+void arch_update_cpu_topology(void)
+{
+	struct tl_info *info = tl_info;
+	struct sys_device *sysdev;
+	int cpu;
+
+	if (!machine_has_topology) {
+		topology_update_polarization_simple();
+		return;
+	}
+	stsi(info, 15, 1, 2);
+	tl_to_cores(info);
+	for_each_online_cpu(cpu) {
+		sysdev = get_cpu_sysdev(cpu);
+		kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
+	}
+}
+
+static void topology_work_fn(struct work_struct *work)
+{
+	arch_reinit_sched_domains();
+}
+
+void topology_schedule_update(void)
+{
+	schedule_work(&topology_work);
+}
+
+static void topology_timer_fn(unsigned long ignored)
+{
+	if (ptf(PTF_CHECK))
+		topology_schedule_update();
+	set_topology_timer();
+}
+
+static void set_topology_timer(void)
+{
+	topology_timer.function = topology_timer_fn;
+	topology_timer.data = 0;
+	topology_timer.expires = jiffies + 60 * HZ;
+	add_timer(&topology_timer);
+}
+
+static void topology_interrupt(__u16 code)
+{
+	schedule_work(&topology_work);
+}
+
+static int __init init_topology_update(void)
+{
+	int rc;
+
+	if (!machine_has_topology) {
+		topology_update_polarization_simple();
+		return 0;
+	}
+	init_timer_deferrable(&topology_timer);
+	if (machine_has_topology_irq) {
+		rc = register_external_interrupt(0x2005, topology_interrupt);
+		if (rc)
+			return rc;
+		ctl_set_bit(0, 8);
+	}
+	else
+		set_topology_timer();
+	return 0;
+}
+__initcall(init_topology_update);
+
+void __init s390_init_cpu_topology(void)
+{
+	unsigned long long facility_bits;
+	struct tl_info *info;
+	struct core_info *core;
+	int nr_cores;
+	int i;
+
+	if (stfle(&facility_bits, 1) <= 0)
+		return;
+	if (!(facility_bits & (1ULL << 52)) || !(facility_bits & (1ULL << 61)))
+		return;
+	machine_has_topology = 1;
+
+	if (facility_bits & (1ULL << 51))
+		machine_has_topology_irq = 1;
+
+	tl_info = alloc_bootmem_pages(PAGE_SIZE);
+	if (!tl_info)
+		goto error;
+	info = tl_info;
+	stsi(info, 15, 1, 2);
+
+	nr_cores = info->mag[NR_MAG - 2];
+	for (i = 0; i < info->mnest - 2; i++)
+		nr_cores *= info->mag[NR_MAG - 3 - i];
+
+	printk(KERN_INFO "CPU topology:");
+	for (i = 0; i < NR_MAG; i++)
+		printk(" %d", info->mag[i]);
+	printk(" / %d\n", info->mnest);
+
+	core = &core_info;
+	for (i = 0; i < nr_cores; i++) {
+		core->next = alloc_bootmem(sizeof(struct core_info));
+		core = core->next;
+		if (!core)
+			goto error;
+	}
+	return;
+error:
+	machine_has_topology = 0;
+	machine_has_topology_irq = 0;
+}
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 60f728a..57b607b 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -42,11 +42,8 @@
 #include <asm/s390_ext.h>
 #include <asm/lowcore.h>
 #include <asm/debug.h>
+#include "entry.h"
 
-/* Called from entry.S only */
-extern void handle_per_exception(struct pt_regs *regs);
-
-typedef void pgm_check_handler_t(struct pt_regs *, long);
 pgm_check_handler_t *pgm_check_table[128];
 
 #ifdef CONFIG_SYSCTL
@@ -59,7 +56,6 @@
 
 extern pgm_check_handler_t do_protection_exception;
 extern pgm_check_handler_t do_dat_exception;
-extern pgm_check_handler_t do_monitor_call;
 extern pgm_check_handler_t do_asce_exception;
 
 #define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; })
@@ -138,7 +134,6 @@
 	else
 		__show_trace(sp, S390_lowcore.thread_info,
 			     S390_lowcore.thread_info + THREAD_SIZE);
-	printk("\n");
 	if (!task)
 		task = current;
 	debug_show_held_locks(task);
@@ -166,6 +161,15 @@
 	show_trace(task, sp);
 }
 
+#ifdef CONFIG_64BIT
+void show_last_breaking_event(struct pt_regs *regs)
+{
+	printk("Last Breaking-Event-Address:\n");
+	printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN);
+	print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN);
+}
+#endif
+
 /*
  * The architecture-independent dump_stack generator
  */
@@ -739,6 +743,5 @@
         pgm_check_table[0x15] = &operand_exception;
         pgm_check_table[0x1C] = &space_switch_exception;
         pgm_check_table[0x1D] = &hfp_sqrt_exception;
-	pgm_check_table[0x40] = &do_monitor_call;
 	pfault_irq_init();
 }
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 70f2a86..eae21a8 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -34,7 +34,7 @@
  */
 void __udelay(unsigned long usecs)
 {
-	u64 end, time, jiffy_timer = 0;
+	u64 end, time, old_cc = 0;
 	unsigned long flags, cr0, mask, dummy;
 	int irq_context;
 
@@ -43,8 +43,8 @@
 		local_bh_disable();
 	local_irq_save(flags);
 	if (raw_irqs_disabled_flags(flags)) {
-		jiffy_timer = S390_lowcore.jiffy_timer;
-		S390_lowcore.jiffy_timer = -1ULL - (4096 << 12);
+		old_cc = S390_lowcore.clock_comparator;
+		S390_lowcore.clock_comparator = -1ULL;
 		__ctl_store(cr0, 0, 0);
 		dummy = (cr0 & 0xffff00e0) | 0x00000800;
 		__ctl_load(dummy , 0, 0);
@@ -55,8 +55,8 @@
 
 	end = get_clock() + ((u64) usecs << 12);
 	do {
-		time = end < S390_lowcore.jiffy_timer ?
-			end : S390_lowcore.jiffy_timer;
+		time = end < S390_lowcore.clock_comparator ?
+			end : S390_lowcore.clock_comparator;
 		set_clock_comparator(time);
 		trace_hardirqs_on();
 		__load_psw_mask(mask);
@@ -65,10 +65,10 @@
 
 	if (raw_irqs_disabled_flags(flags)) {
 		__ctl_load(cr0, 0, 0);
-		S390_lowcore.jiffy_timer = jiffy_timer;
+		S390_lowcore.clock_comparator = old_cc;
 	}
 	if (!irq_context)
 		_local_bh_enable();
-	set_clock_comparator(S390_lowcore.jiffy_timer);
+	set_clock_comparator(S390_lowcore.clock_comparator);
 	local_irq_restore(flags);
 }
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 5efdfe9..d66215b 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -302,6 +302,10 @@
 	pte_t *pte_from, *pte_to;
 	int write_user;
 
+	if (segment_eq(get_fs(), KERNEL_DS)) {
+		memcpy((void __force *) to, (void __force *) from, n);
+		return 0;
+	}
 	done = 0;
 retry:
 	spin_lock(&mm->page_table_lock);
@@ -361,18 +365,10 @@
 		     : "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
 		       "m" (*uaddr) : "cc" );
 
-int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
+static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
 {
 	int oldval = 0, newval, ret;
 
-	spin_lock(&current->mm->page_table_lock);
-	uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
-	if (!uaddr) {
-		spin_unlock(&current->mm->page_table_lock);
-		return -EFAULT;
-	}
-	get_page(virt_to_page(uaddr));
-	spin_unlock(&current->mm->page_table_lock);
 	switch (op) {
 	case FUTEX_OP_SET:
 		__futex_atomic_op("lr %2,%5\n",
@@ -397,17 +393,17 @@
 	default:
 		ret = -ENOSYS;
 	}
-	put_page(virt_to_page(uaddr));
-	*old = oldval;
+	if (ret == 0)
+		*old = oldval;
 	return ret;
 }
 
-int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
 {
 	int ret;
 
-	if (!current->mm)
-		return -EFAULT;
+	if (segment_eq(get_fs(), KERNEL_DS))
+		return __futex_atomic_op_pt(op, uaddr, oparg, old);
 	spin_lock(&current->mm->page_table_lock);
 	uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
 	if (!uaddr) {
@@ -416,13 +412,40 @@
 	}
 	get_page(virt_to_page(uaddr));
 	spin_unlock(&current->mm->page_table_lock);
-	asm volatile("   cs   %1,%4,0(%5)\n"
-		     "0: lr   %0,%1\n"
-		     "1:\n"
-		     EX_TABLE(0b,1b)
+	ret = __futex_atomic_op_pt(op, uaddr, oparg, old);
+	put_page(virt_to_page(uaddr));
+	return ret;
+}
+
+static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+{
+	int ret;
+
+	asm volatile("0: cs   %1,%4,0(%5)\n"
+		     "1: lr   %0,%1\n"
+		     "2:\n"
+		     EX_TABLE(0b,2b) EX_TABLE(1b,2b)
 		     : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
 		     : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
 		     : "cc", "memory" );
+	return ret;
+}
+
+int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+{
+	int ret;
+
+	if (segment_eq(get_fs(), KERNEL_DS))
+		return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
+	spin_lock(&current->mm->page_table_lock);
+	uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
+	if (!uaddr) {
+		spin_unlock(&current->mm->page_table_lock);
+		return -EFAULT;
+	}
+	get_page(virt_to_page(uaddr));
+	spin_unlock(&current->mm->page_table_lock);
+	ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
 	put_page(virt_to_page(uaddr));
 	return ret;
 }
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 880b0eb..ed2af0a 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -289,22 +289,8 @@
 
 	rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
 
-	switch (rc) {
-	case 0:
-		break;
-	case -ENOSPC:
-		PRINT_WARN("segment_load: not loading segment %s - overlaps "
-			   "storage/segment\n", name);
+	if (rc)
 		goto out_free;
-	case -ERANGE:
-		PRINT_WARN("segment_load: not loading segment %s - exceeds "
-			   "kernel mapping range\n", name);
-		goto out_free;
-	default:
-		PRINT_WARN("segment_load: not loading segment %s (rc: %d)\n",
-			   name, rc);
-		goto out_free;
-	}
 
 	seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 	if (seg->res == NULL) {
@@ -582,8 +568,59 @@
 	mutex_unlock(&dcss_lock);
 }
 
+/*
+ * print appropriate error message for segment_load()/segment_type()
+ * return code
+ */
+void segment_warning(int rc, char *seg_name)
+{
+	switch (rc) {
+	case -ENOENT:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "does not exist\n", seg_name);
+		break;
+	case -ENOSYS:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "not running on VM\n", seg_name);
+		break;
+	case -EIO:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "hardware error\n", seg_name);
+		break;
+	case -ENOTSUPP:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "is a multi-part segment\n", seg_name);
+		break;
+	case -ENOSPC:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "overlaps with storage\n", seg_name);
+		break;
+	case -EBUSY:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "overlaps with already loaded dcss\n", seg_name);
+		break;
+	case -EPERM:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "already loaded in incompatible mode\n", seg_name);
+		break;
+	case -ENOMEM:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "out of memory\n", seg_name);
+		break;
+	case -ERANGE:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "exceeds kernel mapping range\n", seg_name);
+		break;
+	default:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "return value %i\n", seg_name, rc);
+		break;
+	}
+}
+
 EXPORT_SYMBOL(segment_load);
 EXPORT_SYMBOL(segment_unload);
 EXPORT_SYMBOL(segment_save);
 EXPORT_SYMBOL(segment_type);
 EXPORT_SYMBOL(segment_modify_shared);
+EXPORT_SYMBOL(segment_warning);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ed13d42..2650f46 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -28,11 +28,11 @@
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
-
 #include <asm/system.h>
 #include <asm/pgtable.h>
 #include <asm/s390_ext.h>
 #include <asm/mmu_context.h>
+#include "../kernel/entry.h"
 
 #ifndef CONFIG_64BIT
 #define __FAIL_ADDR_MASK 0x7ffff000
@@ -50,8 +50,6 @@
 extern int sysctl_userprocess_debug;
 #endif
 
-extern void die(const char *,struct pt_regs *,long);
-
 #ifdef CONFIG_KPROBES
 static inline int notify_page_fault(struct pt_regs *regs, long err)
 {
@@ -245,11 +243,6 @@
 }
 
 #ifdef CONFIG_S390_EXEC_PROTECT
-extern long sys_sigreturn(struct pt_regs *regs);
-extern long sys_rt_sigreturn(struct pt_regs *regs);
-extern long sys32_sigreturn(struct pt_regs *regs);
-extern long sys32_rt_sigreturn(struct pt_regs *regs);
-
 static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
 			 unsigned long address, unsigned long error_code)
 {
@@ -270,15 +263,15 @@
 #ifdef CONFIG_COMPAT
 	compat = test_tsk_thread_flag(current, TIF_31BIT);
 	if (compat && instruction == 0x0a77)
-		sys32_sigreturn(regs);
+		sys32_sigreturn();
 	else if (compat && instruction == 0x0aad)
-		sys32_rt_sigreturn(regs);
+		sys32_rt_sigreturn();
 	else
 #endif
 	if (instruction == 0x0a77)
-		sys_sigreturn(regs);
+		sys_sigreturn();
 	else if (instruction == 0x0aad)
-		sys_rt_sigreturn(regs);
+		sys_rt_sigreturn();
 	else {
 		current->thread.prot_addr = address;
 		current->thread.trap_no = error_code;
@@ -424,7 +417,7 @@
 }
 
 void __kprobes do_protection_exception(struct pt_regs *regs,
-				       unsigned long error_code)
+				       long error_code)
 {
 	/* Protection exception is supressing, decrement psw address. */
 	regs->psw.addr -= (error_code >> 16);
@@ -440,7 +433,7 @@
 	do_exception(regs, 4, 1);
 }
 
-void __kprobes do_dat_exception(struct pt_regs *regs, unsigned long error_code)
+void __kprobes do_dat_exception(struct pt_regs *regs, long error_code)
 {
 	do_exception(regs, error_code & 0xff, 0);
 }
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8053245..202c952 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -50,7 +50,6 @@
 
 	printk("Mem-info:\n");
 	show_free_areas();
-	printk("Free swap:       %6ldkB\n", nr_swap_pages << (PAGE_SHIFT - 10));
 	i = max_mapnr;
 	while (i-- > 0) {
 		if (!pfn_valid(i))
diff --git a/arch/sh/kernel/Makefile_32 b/arch/sh/kernel/Makefile_32
index 62bf373..4bbdce3 100644
--- a/arch/sh/kernel/Makefile_32
+++ b/arch/sh/kernel/Makefile_32
@@ -5,7 +5,7 @@
 extra-y	:= head_32.o init_task.o vmlinux.lds
 
 obj-y	:= debugtraps.o io.o io_generic.o irq.o machvec.o process_32.o \
-	   ptrace_32.o semaphore.o setup.o signal_32.o sys_sh.o sys_sh32.o \
+	   ptrace_32.o setup.o signal_32.o sys_sh.o sys_sh32.o \
 	   syscalls_32.o time_32.o topology.o traps.o traps_32.o
 
 obj-y				+= cpu/ timers/
diff --git a/arch/sh/kernel/Makefile_64 b/arch/sh/kernel/Makefile_64
index e01283d..6edf53b 100644
--- a/arch/sh/kernel/Makefile_64
+++ b/arch/sh/kernel/Makefile_64
@@ -1,7 +1,7 @@
 extra-y	:= head_64.o init_task.o vmlinux.lds
 
 obj-y	:= debugtraps.o io.o io_generic.o irq.o machvec.o process_64.o \
-	   ptrace_64.o semaphore.o setup.o signal_64.o sys_sh.o sys_sh64.o \
+	   ptrace_64.o setup.o signal_64.o sys_sh.o sys_sh64.o \
 	   syscalls_64.o time_64.o topology.o traps.o traps_64.o
 
 obj-y				+= cpu/ timers/
diff --git a/arch/sh/kernel/semaphore.c b/arch/sh/kernel/semaphore.c
deleted file mode 100644
index 184119e..0000000
--- a/arch/sh/kernel/semaphore.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Just taken from alpha implementation.
- * This can't work well, perhaps.
- */
-/*
- *  Generic semaphore code. Buyer beware. Do your own
- * specific changes in <asm/semaphore-helper.h>
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/wait.h>
-#include <linux/init.h>
-#include <asm/semaphore.h>
-#include <asm/semaphore-helper.h>
-
-DEFINE_SPINLOCK(semaphore_wake_lock);
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-#define DOWN_VAR				\
-	struct task_struct *tsk = current;	\
-	wait_queue_t wait;			\
-	init_waitqueue_entry(&wait, tsk);
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	tsk->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		tsk->state = (task_state);	\
-	}					\
-	tsk->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DOWN_VAR
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-	DOWN_VAR
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, tsk);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}
diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c
index 45bb333..6d40546 100644
--- a/arch/sh/kernel/sh_ksyms_32.c
+++ b/arch/sh/kernel/sh_ksyms_32.c
@@ -9,7 +9,6 @@
 #include <linux/pci.h>
 #include <linux/irq.h>
 #include <asm/sections.h>
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
@@ -48,12 +47,6 @@
 EXPORT_SYMBOL(get_vm_area);
 #endif
 
-/* semaphore exports */
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-
 EXPORT_SYMBOL(__udelay);
 EXPORT_SYMBOL(__ndelay);
 EXPORT_SYMBOL(__const_udelay);
diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c
index b6410ce..a310c97 100644
--- a/arch/sh/kernel/sh_ksyms_64.c
+++ b/arch/sh/kernel/sh_ksyms_64.c
@@ -16,7 +16,6 @@
 #include <linux/in6.h>
 #include <linux/interrupt.h>
 #include <linux/screen_info.h>
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
@@ -37,9 +36,6 @@
 EXPORT_SYMBOL(screen_info);
 #endif
 
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__up);
 EXPORT_SYMBOL(__put_user_asm_l);
 EXPORT_SYMBOL(__get_user_asm_l);
 EXPORT_SYMBOL(copy_page);
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index bf1b15d..2712bb1 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -12,7 +12,7 @@
 	    sys_sparc.o sunos_asm.o systbls.o \
 	    time.o windows.o cpu.o devices.o sclow.o \
 	    tadpole.o tick14.o ptrace.o sys_solaris.o \
-	    unaligned.o una_asm.o muldiv.o semaphore.o \
+	    unaligned.o una_asm.o muldiv.o \
 	    prom.o of_device.o devres.o
 
 devres-y = ../../../kernel/irq/devres.o
diff --git a/arch/sparc/kernel/semaphore.c b/arch/sparc/kernel/semaphore.c
deleted file mode 100644
index 0c37c1a..0000000
--- a/arch/sparc/kernel/semaphore.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/* $Id: semaphore.c,v 1.7 2001/04/18 21:06:05 davem Exp $ */
-
-/* sparc32 semaphore implementation, based on i386 version */
-
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-
-#include <asm/semaphore.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is
- * protected by the semaphore spinlock.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-static DEFINE_SPINLOCK(semaphore_lock);
-
-void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock.
-		 */
-		if (!atomic24_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_UNINTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-	wake_up(&sem->wait);
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_INTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers ++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic24_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock. The
-		 * "-1" is because we're still hoping to get
-		 * the lock.
-		 */
-		if (!atomic24_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_INTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	tsk->state = TASK_RUNNING;
-	remove_wait_queue(&sem->wait, &wait);
-	wake_up(&sem->wait);
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- */
-int __down_trylock(struct semaphore * sem)
-{
-	int sleepers;
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock.
-	 */
-	if (!atomic24_add_negative(sleepers, &sem->count))
-		wake_up(&sem->wait);
-
-	spin_unlock_irqrestore(&semaphore_lock, flags);
-	return 1;
-}
diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c
index c1025e5..97b1de0 100644
--- a/arch/sparc/kernel/sparc_ksyms.c
+++ b/arch/sparc/kernel/sparc_ksyms.c
@@ -107,11 +107,6 @@
 EXPORT_SYMBOL(___rw_read_exit);
 EXPORT_SYMBOL(___rw_write_enter);
 #endif
-/* semaphores */
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__down_interruptible);
 
 EXPORT_SYMBOL(sparc_valid_addr_bitmap);
 EXPORT_SYMBOL(phys_base);
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index 1bf5b18..459462e 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -10,7 +10,7 @@
 obj-y		:= process.o setup.o cpu.o idprom.o \
 		   traps.o auxio.o una_asm.o sysfs.o iommu.o \
 		   irq.o ptrace.o time.o sys_sparc.o signal.o \
-		   unaligned.o central.o pci.o starfire.o semaphore.o \
+		   unaligned.o central.o pci.o starfire.o \
 		   power.o sbus.o sparc64_ksyms.o chmc.o \
 		   visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o
 
diff --git a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c
deleted file mode 100644
index 9974a68..0000000
--- a/arch/sparc64/kernel/semaphore.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/* semaphore.c: Sparc64 semaphore implementation.
- *
- * This is basically the PPC semaphore scheme ported to use
- * the sparc64 atomic instructions, so see the PPC code for
- * credits.
- */
-
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-
-/*
- * Atomically update sem->count.
- * This does the equivalent of the following:
- *
- *	old_count = sem->count;
- *	tmp = MAX(old_count, 0) + incr;
- *	sem->count = tmp;
- *	return old_count;
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	int old_count, tmp;
-
-	__asm__ __volatile__("\n"
-"	! __sem_update_count old_count(%0) tmp(%1) incr(%4) &sem->count(%3)\n"
-"1:	ldsw	[%3], %0\n"
-"	mov	%0, %1\n"
-"	cmp	%0, 0\n"
-"	movl	%%icc, 0, %1\n"
-"	add	%1, %4, %1\n"
-"	cas	[%3], %0, %1\n"
-"	cmp	%0, %1\n"
-"	membar	#StoreLoad | #StoreStore\n"
-"	bne,pn	%%icc, 1b\n"
-"	 nop\n"
-	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-	: "r" (&sem->count), "r" (incr), "m" (sem->count)
-	: "cc");
-
-	return old_count;
-}
-
-static void __up(struct semaphore *sem)
-{
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-
-void up(struct semaphore *sem)
-{
-	/* This atomically does:
-	 * 	old_val = sem->count;
-	 *	new_val = sem->count + 1;
-	 *	sem->count = new_val;
-	 *	if (old_val < 0)
-	 *		__up(sem);
-	 *
-	 * The (old_val < 0) test is equivalent to
-	 * the more straightforward (new_val <= 0),
-	 * but it is easier to test the former because
-	 * of how the CAS instruction works.
-	 */
-
-	__asm__ __volatile__("\n"
-"	! up sem(%0)\n"
-"	membar	#StoreLoad | #LoadLoad\n"
-"1:	lduw	[%0], %%g1\n"
-"	add	%%g1, 1, %%g7\n"
-"	cas	[%0], %%g1, %%g7\n"
-"	cmp	%%g1, %%g7\n"
-"	bne,pn	%%icc, 1b\n"
-"	 addcc	%%g7, 1, %%g0\n"
-"	membar	#StoreLoad | #StoreStore\n"
-"	ble,pn	%%icc, 3f\n"
-"	 nop\n"
-"2:\n"
-"	.subsection 2\n"
-"3:	mov	%0, %%g1\n"
-"	save	%%sp, -160, %%sp\n"
-"	call	%1\n"
-"	 mov	%%g1, %%o0\n"
-"	ba,pt	%%xcc, 2b\n"
-"	 restore\n"
-"	.previous\n"
-	: : "r" (sem), "i" (__up)
-	: "g1", "g2", "g3", "g7", "memory", "cc");
-}
-
-static void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		tsk->state = TASK_UNINTERRUPTIBLE;
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-
-	wake_up(&sem->wait);
-}
-
-void __sched down(struct semaphore *sem)
-{
-	might_sleep();
-	/* This atomically does:
-	 * 	old_val = sem->count;
-	 *	new_val = sem->count - 1;
-	 *	sem->count = new_val;
-	 *	if (old_val < 1)
-	 *		__down(sem);
-	 *
-	 * The (old_val < 1) test is equivalent to
-	 * the more straightforward (new_val < 0),
-	 * but it is easier to test the former because
-	 * of how the CAS instruction works.
-	 */
-
-	__asm__ __volatile__("\n"
-"	! down sem(%0)\n"
-"1:	lduw	[%0], %%g1\n"
-"	sub	%%g1, 1, %%g7\n"
-"	cas	[%0], %%g1, %%g7\n"
-"	cmp	%%g1, %%g7\n"
-"	bne,pn	%%icc, 1b\n"
-"	 cmp	%%g7, 1\n"
-"	membar	#StoreLoad | #StoreStore\n"
-"	bl,pn	%%icc, 3f\n"
-"	 nop\n"
-"2:\n"
-"	.subsection 2\n"
-"3:	mov	%0, %%g1\n"
-"	save	%%sp, -160, %%sp\n"
-"	call	%1\n"
-"	 mov	%%g1, %%o0\n"
-"	ba,pt	%%xcc, 2b\n"
-"	 restore\n"
-"	.previous\n"
-	: : "r" (sem), "i" (__down)
-	: "g1", "g2", "g3", "g7", "memory", "cc");
-}
-
-int down_trylock(struct semaphore *sem)
-{
-	int ret;
-
-	/* This atomically does:
-	 * 	old_val = sem->count;
-	 *	new_val = sem->count - 1;
-	 *	if (old_val < 1) {
-	 *		ret = 1;
-	 *	} else {
-	 *		sem->count = new_val;
-	 *		ret = 0;
-	 *	}
-	 *
-	 * The (old_val < 1) test is equivalent to
-	 * the more straightforward (new_val < 0),
-	 * but it is easier to test the former because
-	 * of how the CAS instruction works.
-	 */
-
-	__asm__ __volatile__("\n"
-"	! down_trylock sem(%1) ret(%0)\n"
-"1:	lduw	[%1], %%g1\n"
-"	sub	%%g1, 1, %%g7\n"
-"	cmp	%%g1, 1\n"
-"	bl,pn	%%icc, 2f\n"
-"	 mov	1, %0\n"
-"	cas	[%1], %%g1, %%g7\n"
-"	cmp	%%g1, %%g7\n"
-"	bne,pn	%%icc, 1b\n"
-"	 mov	0, %0\n"
-"	membar	#StoreLoad | #StoreStore\n"
-"2:\n"
-	: "=&r" (ret)
-	: "r" (sem)
-	: "g1", "g7", "memory", "cc");
-
-	return ret;
-}
-
-static int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			__sem_update_count(sem, 0);
-			retval = -EINTR;
-			break;
-		}
-		schedule();
-		tsk->state = TASK_INTERRUPTIBLE;
-	}
-	tsk->state = TASK_RUNNING;
-	remove_wait_queue(&sem->wait, &wait);
-	wake_up(&sem->wait);
-	return retval;
-}
-
-int __sched down_interruptible(struct semaphore *sem)
-{
-	int ret = 0;
-	
-	might_sleep();
-	/* This atomically does:
-	 * 	old_val = sem->count;
-	 *	new_val = sem->count - 1;
-	 *	sem->count = new_val;
-	 *	if (old_val < 1)
-	 *		ret = __down_interruptible(sem);
-	 *
-	 * The (old_val < 1) test is equivalent to
-	 * the more straightforward (new_val < 0),
-	 * but it is easier to test the former because
-	 * of how the CAS instruction works.
-	 */
-
-	__asm__ __volatile__("\n"
-"	! down_interruptible sem(%2) ret(%0)\n"
-"1:	lduw	[%2], %%g1\n"
-"	sub	%%g1, 1, %%g7\n"
-"	cas	[%2], %%g1, %%g7\n"
-"	cmp	%%g1, %%g7\n"
-"	bne,pn	%%icc, 1b\n"
-"	 cmp	%%g7, 1\n"
-"	membar	#StoreLoad | #StoreStore\n"
-"	bl,pn	%%icc, 3f\n"
-"	 nop\n"
-"2:\n"
-"	.subsection 2\n"
-"3:	mov	%2, %%g1\n"
-"	save	%%sp, -160, %%sp\n"
-"	call	%3\n"
-"	 mov	%%g1, %%o0\n"
-"	ba,pt	%%xcc, 2b\n"
-"	 restore\n"
-"	.previous\n"
-	: "=r" (ret)
-	: "0" (ret), "r" (sem), "i" (__down_interruptible)
-	: "g1", "g2", "g3", "g7", "memory", "cc");
-	return ret;
-}
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 51fa773..051b8d9 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -130,12 +130,6 @@
 
 EXPORT_SYMBOL(sparc64_get_clock_tick);
 
-/* semaphores */
-EXPORT_SYMBOL(down);
-EXPORT_SYMBOL(down_trylock);
-EXPORT_SYMBOL(down_interruptible);
-EXPORT_SYMBOL(up);
-
 /* RW semaphores */
 EXPORT_SYMBOL(__down_read);
 EXPORT_SYMBOL(__down_read_trylock);
diff --git a/arch/um/Kconfig.i386 b/arch/um/Kconfig.i386
index 3cd8a04..e09edfa 100644
--- a/arch/um/Kconfig.i386
+++ b/arch/um/Kconfig.i386
@@ -19,10 +19,6 @@
 	bool
 	default n
 
-config SEMAPHORE_SLEEPERS
-	bool
-	default y
-
 config 3_LEVEL_PGTABLES
 	bool "Three-level pagetables (EXPERIMENTAL)"
 	default n
diff --git a/arch/um/Kconfig.x86_64 b/arch/um/Kconfig.x86_64
index 6533b34..3fbe69e 100644
--- a/arch/um/Kconfig.x86_64
+++ b/arch/um/Kconfig.x86_64
@@ -11,10 +11,6 @@
 	bool
 	default y
 
-config SEMAPHORE_SLEEPERS
-	bool
-	default y
-
 config 3_LEVEL_PGTABLES
        bool
        default y
diff --git a/arch/um/sys-i386/ksyms.c b/arch/um/sys-i386/ksyms.c
index 2a1eac1..bfbefd3 100644
--- a/arch/um/sys-i386/ksyms.c
+++ b/arch/um/sys-i386/ksyms.c
@@ -1,17 +1,5 @@
 #include "linux/module.h"
-#include "linux/in6.h"
-#include "linux/rwsem.h"
-#include "asm/byteorder.h"
-#include "asm/delay.h"
-#include "asm/semaphore.h"
-#include "asm/uaccess.h"
 #include "asm/checksum.h"
-#include "asm/errno.h"
-
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
 
 /* Networking helper routines. */
 EXPORT_SYMBOL(csum_partial);
diff --git a/arch/um/sys-ppc/Makefile b/arch/um/sys-ppc/Makefile
index 0890152..b8bc844 100644
--- a/arch/um/sys-ppc/Makefile
+++ b/arch/um/sys-ppc/Makefile
@@ -3,7 +3,7 @@
 .S.o:
 	$(CC) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o
 
-OBJS = ptrace.o sigcontext.o semaphore.o checksum.o miscthings.o misc.o \
+OBJS = ptrace.o sigcontext.o checksum.o miscthings.o misc.o \
 	ptrace_user.o sysrq.o
 
 EXTRA_AFLAGS := -DCONFIG_PPC32 -I. -I$(srctree)/arch/ppc/kernel
@@ -20,10 +20,6 @@
 sigcontext.o: sigcontext.c
 	$(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $<
 
-semaphore.c:
-	rm -f $@
-	ln -s $(srctree)/arch/ppc/kernel/$@ $@
-
 checksum.S:
 	rm -f $@
 	ln -s $(srctree)/arch/ppc/lib/$@ $@
@@ -66,4 +62,4 @@
 	$(CC) $(EXTRA_AFLAGS) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o
 	rm -f asm
 
-clean-files := $(OBJS) ppc_defs.h checksum.S semaphore.c mk_defs.c
+clean-files := $(OBJS) ppc_defs.h checksum.S mk_defs.c
diff --git a/arch/um/sys-x86_64/ksyms.c b/arch/um/sys-x86_64/ksyms.c
index 12c5936..4d7d1a8 100644
--- a/arch/um/sys-x86_64/ksyms.c
+++ b/arch/um/sys-x86_64/ksyms.c
@@ -1,16 +1,5 @@
 #include "linux/module.h"
-#include "linux/in6.h"
-#include "linux/rwsem.h"
-#include "asm/byteorder.h"
-#include "asm/semaphore.h"
-#include "asm/uaccess.h"
-#include "asm/checksum.h"
-#include "asm/errno.h"
-
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
+#include "asm/string.h"
 
 /*XXX: we need them because they would be exported by x86_64 */
 EXPORT_SYMBOL(__memcpy);
diff --git a/arch/v850/kernel/Makefile b/arch/v850/kernel/Makefile
index 3930482..da5889c 100644
--- a/arch/v850/kernel/Makefile
+++ b/arch/v850/kernel/Makefile
@@ -11,7 +11,7 @@
 
 extra-y := head.o init_task.o vmlinux.lds
 
-obj-y += intv.o entry.o process.o syscalls.o time.o semaphore.o setup.o \
+obj-y += intv.o entry.o process.o syscalls.o time.o setup.o \
 	 signal.o irq.o mach.o ptrace.o bug.o
 obj-$(CONFIG_MODULES)		+= module.o v850_ksyms.o
 # chip-specific code
diff --git a/arch/v850/kernel/semaphore.c b/arch/v850/kernel/semaphore.c
deleted file mode 100644
index fc89fd6..0000000
--- a/arch/v850/kernel/semaphore.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * arch/v850/kernel/semaphore.c -- Semaphore support
- *
- *  Copyright (C) 1998-2000  IBM Corporation
- *  Copyright (C) 1999  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * This file is a copy of the s390 version, arch/s390/kernel/semaphore.c
- *    Author(s): Martin Schwidefsky
- * which was derived from the i386 version, linux/arch/i386/kernel/semaphore.c
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-
-#include <asm/semaphore.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is
- * protected by the semaphore spinlock.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-static DEFINE_SPINLOCK(semaphore_lock);
-
-void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_UNINTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-	wake_up(&sem->wait);
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_INTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers ++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock. The
-		 * "-1" is because we're still hoping to get
-		 * the lock.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_INTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	tsk->state = TASK_RUNNING;
-	remove_wait_queue(&sem->wait, &wait);
-	wake_up(&sem->wait);
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- */
-int __down_trylock(struct semaphore * sem)
-{
-        unsigned long flags;
-	int sleepers;
-
-	spin_lock_irqsave(&semaphore_lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count))
-		wake_up(&sem->wait);
-
-	spin_unlock_irqrestore(&semaphore_lock, flags);
-	return 1;
-}
diff --git a/arch/v850/kernel/v850_ksyms.c b/arch/v850/kernel/v850_ksyms.c
index 93575fd..8d386a5 100644
--- a/arch/v850/kernel/v850_ksyms.c
+++ b/arch/v850/kernel/v850_ksyms.c
@@ -11,7 +11,6 @@
 #include <asm/pgalloc.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/current.h>
 
@@ -34,12 +33,6 @@
 EXPORT_SYMBOL (memcpy);
 EXPORT_SYMBOL (memmove);
 
-/* semaphores */
-EXPORT_SYMBOL (__down);
-EXPORT_SYMBOL (__down_interruptible);
-EXPORT_SYMBOL (__down_trylock);
-EXPORT_SYMBOL (__up);
-
 /*
  * libgcc functions - functions that are used internally by the
  * compiler...  (prototypes are not correct though, but that
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4e32b6f..701c4a2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -53,9 +53,6 @@
 config HAVE_LATENCYTOP_SUPPORT
 	def_bool y
 
-config SEMAPHORE_SLEEPERS
-	def_bool y
-
 config FAST_CMPXCHG_LOCAL
 	bool
 	default y
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 0616278..deb4378 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,13 +1,8 @@
 #include <linux/module.h>
-#include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
 
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
 /* Networking helper routines. */
 EXPORT_SYMBOL(csum_partial_copy_generic);
 
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index e63d968..58882f9 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -4,7 +4,6 @@
 #include <linux/module.h>
 #include <linux/smp.h>
 
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -12,11 +11,6 @@
 
 EXPORT_SYMBOL(kernel_thread);
 
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
-
 EXPORT_SYMBOL(__get_user_1);
 EXPORT_SYMBOL(__get_user_2);
 EXPORT_SYMBOL(__get_user_4);
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S
index 3899bd3..648fe47 100644
--- a/arch/x86/lib/semaphore_32.S
+++ b/arch/x86/lib/semaphore_32.S
@@ -30,89 +30,6 @@
  * value or just clobbered..
  */
 	.section .sched.text, "ax"
-ENTRY(__down_failed)
-	CFI_STARTPROC
-	FRAME
-	pushl %edx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET edx,0
-	pushl %ecx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ecx,0
-	call __down
-	popl %ecx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE ecx
-	popl %edx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE edx
-	ENDFRAME
-	ret
-	CFI_ENDPROC
-	ENDPROC(__down_failed)
-
-ENTRY(__down_failed_interruptible)
-	CFI_STARTPROC
-	FRAME
-	pushl %edx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET edx,0
-	pushl %ecx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ecx,0
-	call __down_interruptible
-	popl %ecx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE ecx
-	popl %edx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE edx
-	ENDFRAME
-	ret
-	CFI_ENDPROC
-	ENDPROC(__down_failed_interruptible)
-
-ENTRY(__down_failed_trylock)
-	CFI_STARTPROC
-	FRAME
-	pushl %edx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET edx,0
-	pushl %ecx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ecx,0
-	call __down_trylock
-	popl %ecx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE ecx
-	popl %edx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE edx
-	ENDFRAME
-	ret
-	CFI_ENDPROC
-	ENDPROC(__down_failed_trylock)
-
-ENTRY(__up_wakeup)
-	CFI_STARTPROC
-	FRAME
-	pushl %edx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET edx,0
-	pushl %ecx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ecx,0
-	call __up
-	popl %ecx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE ecx
-	popl %edx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE edx
-	ENDFRAME
-	ret
-	CFI_ENDPROC
-	ENDPROC(__up_wakeup)
 
 /*
  * rw spinlock fallbacks
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index 8b92d42..e009251 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -41,11 +41,6 @@
 	thunk rwsem_downgrade_thunk,rwsem_downgrade_wake
 #endif	
 	
-	thunk __down_failed,__down
-	thunk_retrax __down_failed_interruptible,__down_interruptible
-	thunk_retrax __down_failed_trylock,__down_trylock
-	thunk __up_wakeup,__up
-
 #ifdef CONFIG_TRACE_IRQFLAGS
 	thunk trace_hardirqs_on_thunk,trace_hardirqs_on
 	thunk trace_hardirqs_off_thunk,trace_hardirqs_off
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index f582d6a..7419dbc 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -5,7 +5,7 @@
 extra-y := head.o vmlinux.lds
 
 
-obj-y := align.o entry.o irq.o coprocessor.o process.o ptrace.o semaphore.o  \
+obj-y := align.o entry.o irq.o coprocessor.o process.o ptrace.o \
 	 setup.o signal.o syscall.o time.o traps.o vectors.o platform.o  \
 	 pci-dma.o init_task.o io.o
 
diff --git a/arch/xtensa/kernel/semaphore.c b/arch/xtensa/kernel/semaphore.c
deleted file mode 100644
index 995c641..0000000
--- a/arch/xtensa/kernel/semaphore.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * arch/xtensa/kernel/semaphore.c
- *
- * Generic semaphore code. Buyer beware. Do your own specific changes
- * in <asm/semaphore-helper.h>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- *
- * Joe Taylor	<joe@tensilica.com, joetylr@yahoo.com>
- * Chris Zankel	<chris@zankel.net>
- * Marc Gauthier<marc@tensilica.com, marc@alumni.uwaterloo.ca>
- * Kevin Chea
- */
-
-#include <linux/sched.h>
-#include <linux/wait.h>
-#include <linux/init.h>
-#include <asm/semaphore.h>
-#include <asm/errno.h>
-
-/*
- * These two _must_ execute atomically wrt each other.
- */
-
-static __inline__ void wake_one_more(struct semaphore * sem)
-{
-	atomic_inc((atomic_t *)&sem->sleepers);
-}
-
-static __inline__ int waking_non_zero(struct semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->sleepers > 0) {
-		sem->sleepers--;
-		ret = 1;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_interruptible:
- *	1	got the lock
- *	0	go to sleep
- *	-EINTR	interrupted
- *
- * We must undo the sem->count down_interruptible() increment while we are
- * protected by the spinlock in order to make atomic this atomic_inc() with the
- * atomic_read() in wake_one_more(), otherwise we can race. -arca
- */
-
-static __inline__ int waking_non_zero_interruptible(struct semaphore *sem,
-						struct task_struct *tsk)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->sleepers > 0) {
-		sem->sleepers--;
-		ret = 1;
-	} else if (signal_pending(tsk)) {
-		atomic_inc(&sem->count);
-		ret = -EINTR;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_trylock:
- *	1	failed to lock
- *	0	got the lock
- *
- * We must undo the sem->count down_trylock() increment while we are
- * protected by the spinlock in order to make atomic this atomic_inc() with the
- * atomic_read() in wake_one_more(), otherwise we can race. -arca
- */
-
-static __inline__ int waking_non_zero_trylock(struct semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 1;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->sleepers <= 0)
-		atomic_inc(&sem->count);
-	else {
-		sem->sleepers--;
-		ret = 0;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-DEFINE_SPINLOCK(semaphore_wake_lock);
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-#define DOWN_VAR				\
-	struct task_struct *tsk = current;	\
-	wait_queue_t wait;			\
-	init_waitqueue_entry(&wait, tsk);
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	tsk->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		tsk->state = (task_state);	\
-	}					\
-	tsk->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DOWN_VAR
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-	DOWN_VAR
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, tsk);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}
diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c
index 60dbdb4..6e52cdd 100644
--- a/arch/xtensa/kernel/xtensa_ksyms.c
+++ b/arch/xtensa/kernel/xtensa_ksyms.c
@@ -26,7 +26,6 @@
 #include <asm/io.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
-#include <asm/semaphore.h>
 #ifdef CONFIG_BLK_DEV_FD
 #include <asm/floppy.h>
 #endif
@@ -71,14 +70,6 @@
 EXPORT_SYMBOL(__udivdi3);
 EXPORT_SYMBOL(__umoddi3);
 
-/*
- * Semaphore operations
- */
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__up);
-
 #ifdef CONFIG_NET
 /*
  * Networking support
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index a697fb6..a498a6c 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -4,6 +4,8 @@
  *  Copyright (C) 2000       Andrew Henroid
  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (c) 2008 Intel Corporation
+ *   Author: Matthew Wilcox <willy@linux.intel.com>
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
@@ -37,15 +39,18 @@
 #include <linux/workqueue.h>
 #include <linux/nmi.h>
 #include <linux/acpi.h>
-#include <acpi/acpi.h>
-#include <asm/io.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/processor.h>
-#include <asm/uaccess.h>
-
 #include <linux/efi.h>
 #include <linux/ioport.h>
 #include <linux/list.h>
+#include <linux/jiffies.h>
+#include <linux/semaphore.h>
+
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+#include <acpi/acpi.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/processor.h>
 
 #define _COMPONENT		ACPI_OS_SERVICES
 ACPI_MODULE_NAME("osl");
@@ -764,7 +769,6 @@
 {
 	struct semaphore *sem = NULL;
 
-
 	sem = acpi_os_allocate(sizeof(struct semaphore));
 	if (!sem)
 		return AE_NO_MEMORY;
@@ -791,12 +795,12 @@
 {
 	struct semaphore *sem = (struct semaphore *)handle;
 
-
 	if (!sem)
 		return AE_BAD_PARAMETER;
 
 	ACPI_DEBUG_PRINT((ACPI_DB_MUTEX, "Deleting semaphore[%p].\n", handle));
 
+	BUG_ON(!list_empty(&sem->wait_list));
 	kfree(sem);
 	sem = NULL;
 
@@ -804,21 +808,15 @@
 }
 
 /*
- * TODO: The kernel doesn't have a 'down_timeout' function -- had to
- * improvise.  The process is to sleep for one scheduler quantum
- * until the semaphore becomes available.  Downside is that this
- * may result in starvation for timeout-based waits when there's
- * lots of semaphore activity.
- *
  * TODO: Support for units > 1?
  */
 acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout)
 {
 	acpi_status status = AE_OK;
 	struct semaphore *sem = (struct semaphore *)handle;
+	long jiffies;
 	int ret = 0;
 
-
 	if (!sem || (units < 1))
 		return AE_BAD_PARAMETER;
 
@@ -828,58 +826,14 @@
 	ACPI_DEBUG_PRINT((ACPI_DB_MUTEX, "Waiting for semaphore[%p|%d|%d]\n",
 			  handle, units, timeout));
 
-	/*
-	 * This can be called during resume with interrupts off.
-	 * Like boot-time, we should be single threaded and will
-	 * always get the lock if we try -- timeout or not.
-	 * If this doesn't succeed, then we will oops courtesy of
-	 * might_sleep() in down().
-	 */
-	if (!down_trylock(sem))
-		return AE_OK;
-
-	switch (timeout) {
-		/*
-		 * No Wait:
-		 * --------
-		 * A zero timeout value indicates that we shouldn't wait - just
-		 * acquire the semaphore if available otherwise return AE_TIME
-		 * (a.k.a. 'would block').
-		 */
-	case 0:
-		if (down_trylock(sem))
-			status = AE_TIME;
-		break;
-
-		/*
-		 * Wait Indefinitely:
-		 * ------------------
-		 */
-	case ACPI_WAIT_FOREVER:
-		down(sem);
-		break;
-
-		/*
-		 * Wait w/ Timeout:
-		 * ----------------
-		 */
-	default:
-		// TODO: A better timeout algorithm?
-		{
-			int i = 0;
-			static const int quantum_ms = 1000 / HZ;
-
-			ret = down_trylock(sem);
-			for (i = timeout; (i > 0 && ret != 0); i -= quantum_ms) {
-				schedule_timeout_interruptible(1);
-				ret = down_trylock(sem);
-			}
-
-			if (ret != 0)
-				status = AE_TIME;
-		}
-		break;
-	}
+	if (timeout == ACPI_WAIT_FOREVER)
+		jiffies = MAX_SCHEDULE_TIMEOUT;
+	else
+		jiffies = msecs_to_jiffies(timeout);
+	
+	ret = down_timeout(sem, jiffies);
+	if (ret)
+		status = AE_TIME;
 
 	if (ACPI_FAILURE(status)) {
 		ACPI_DEBUG_PRINT((ACPI_DB_MUTEX,
@@ -902,7 +856,6 @@
 {
 	struct semaphore *sem = (struct semaphore *)handle;
 
-
 	if (!sem || (units < 1))
 		return AE_BAD_PARAMETER;
 
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 84cdf90..349b6ed 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -116,6 +116,10 @@
 		err = -EAGAIN;
 		if (!bytes_read && (filp->f_flags & O_NONBLOCK))
 			goto out;
+		if (bytes_read < 0) {
+			err = bytes_read;
+			goto out;
+		}
 
 		err = -EFAULT;
 		while (bytes_read && size) {
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 6b658d8..6d2f0c8 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -64,6 +64,7 @@
 	tristate "Support for PCI-attached cryptographic adapters"
 	depends on S390
 	select ZCRYPT_MONOLITHIC if ZCRYPT="y"
+	select HW_RANDOM
 	help
 	  Select this option if you want to use a PCI-attached cryptographic
 	  adapter like:
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index faa7ce3..a47fe64 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -467,6 +467,31 @@
 	return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
 }
 
+/*
+ * Trivial helpers to strip endian annotation and compare; the
+ * endianness doesn't actually matter since we just need a stable
+ * order for the RB tree.
+ */
+static int be32_lt(__be32 a, __be32 b)
+{
+	return (__force u32) a < (__force u32) b;
+}
+
+static int be32_gt(__be32 a, __be32 b)
+{
+	return (__force u32) a > (__force u32) b;
+}
+
+static int be64_lt(__be64 a, __be64 b)
+{
+	return (__force u64) a < (__force u64) b;
+}
+
+static int be64_gt(__be64 a, __be64 b)
+{
+	return (__force u64) a > (__force u64) b;
+}
+
 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
 {
 	struct rb_node **link = &cm.listen_service_table.rb_node;
@@ -492,9 +517,9 @@
 			link = &(*link)->rb_left;
 		else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
 			link = &(*link)->rb_right;
-		else if (service_id < cur_cm_id_priv->id.service_id)
+		else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
 			link = &(*link)->rb_left;
-		else if (service_id > cur_cm_id_priv->id.service_id)
+		else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
 			link = &(*link)->rb_right;
 		else if (data_cmp < 0)
 			link = &(*link)->rb_left;
@@ -527,9 +552,9 @@
 			node = node->rb_left;
 		else if (device > cm_id_priv->id.device)
 			node = node->rb_right;
-		else if (service_id < cm_id_priv->id.service_id)
+		else if (be64_lt(service_id, cm_id_priv->id.service_id))
 			node = node->rb_left;
-		else if (service_id > cm_id_priv->id.service_id)
+		else if (be64_gt(service_id, cm_id_priv->id.service_id))
 			node = node->rb_right;
 		else if (data_cmp < 0)
 			node = node->rb_left;
@@ -552,13 +577,13 @@
 		parent = *link;
 		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
 					     remote_id_node);
-		if (remote_id < cur_timewait_info->work.remote_id)
+		if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
 			link = &(*link)->rb_left;
-		else if (remote_id > cur_timewait_info->work.remote_id)
+		else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
 			link = &(*link)->rb_right;
-		else if (remote_ca_guid < cur_timewait_info->remote_ca_guid)
+		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
 			link = &(*link)->rb_left;
-		else if (remote_ca_guid > cur_timewait_info->remote_ca_guid)
+		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
 			link = &(*link)->rb_right;
 		else
 			return cur_timewait_info;
@@ -578,13 +603,13 @@
 	while (node) {
 		timewait_info = rb_entry(node, struct cm_timewait_info,
 					 remote_id_node);
-		if (remote_id < timewait_info->work.remote_id)
+		if (be32_lt(remote_id, timewait_info->work.remote_id))
 			node = node->rb_left;
-		else if (remote_id > timewait_info->work.remote_id)
+		else if (be32_gt(remote_id, timewait_info->work.remote_id))
 			node = node->rb_right;
-		else if (remote_ca_guid < timewait_info->remote_ca_guid)
+		else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
 			node = node->rb_left;
-		else if (remote_ca_guid > timewait_info->remote_ca_guid)
+		else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
 			node = node->rb_right;
 		else
 			return timewait_info;
@@ -605,13 +630,13 @@
 		parent = *link;
 		cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
 					     remote_qp_node);
-		if (remote_qpn < cur_timewait_info->remote_qpn)
+		if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
 			link = &(*link)->rb_left;
-		else if (remote_qpn > cur_timewait_info->remote_qpn)
+		else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
 			link = &(*link)->rb_right;
-		else if (remote_ca_guid < cur_timewait_info->remote_ca_guid)
+		else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
 			link = &(*link)->rb_left;
-		else if (remote_ca_guid > cur_timewait_info->remote_ca_guid)
+		else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
 			link = &(*link)->rb_right;
 		else
 			return cur_timewait_info;
@@ -635,9 +660,9 @@
 		parent = *link;
 		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
 					  sidr_id_node);
-		if (remote_id < cur_cm_id_priv->id.remote_id)
+		if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
 			link = &(*link)->rb_left;
-		else if (remote_id > cur_cm_id_priv->id.remote_id)
+		else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
 			link = &(*link)->rb_right;
 		else {
 			int cmp;
@@ -2848,7 +2873,7 @@
 	cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
 			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
 	sidr_req_msg->request_id = cm_id_priv->id.local_id;
-	sidr_req_msg->pkey = cpu_to_be16(param->path->pkey);
+	sidr_req_msg->pkey = param->path->pkey;
 	sidr_req_msg->service_id = param->service_id;
 
 	if (param->private_data && param->private_data_len)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d81c156..671f137 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1289,7 +1289,7 @@
 	new_cm_id = rdma_create_id(listen_id->id.event_handler,
 				   listen_id->id.context,
 				   RDMA_PS_TCP);
-	if (!new_cm_id) {
+	if (IS_ERR(new_cm_id)) {
 		ret = -ENOMEM;
 		goto out;
 	}
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 06d502c..1286dc1 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -158,8 +158,7 @@
 #endif
 	}
 
-	list_splice(&pool->dirty_list, &unmap_list);
-	INIT_LIST_HEAD(&pool->dirty_list);
+	list_splice_init(&pool->dirty_list, &unmap_list);
 	pool->dirty_len = 0;
 
 	spin_unlock_irq(&pool->pool_lock);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 15937eb..ca4cf3a 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -614,7 +614,7 @@
 	if (!ctx->cm_id->device)
 		goto out;
 
-	resp.node_guid = ctx->cm_id->device->node_guid;
+	resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
 	resp.port_num = ctx->cm_id->port_num;
 	switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index c75eb6c..2cad8b4 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -81,13 +81,13 @@
 
 struct ib_uverbs_event_file {
 	struct kref				ref;
-	struct file			       *file;
 	struct ib_uverbs_file		       *uverbs_file;
 	spinlock_t				lock;
-	int					is_async;
 	wait_queue_head_t			poll_wait;
 	struct fasync_struct		       *async_queue;
 	struct list_head			event_list;
+	int					is_async;
+	int					is_closed;
 };
 
 struct ib_uverbs_file {
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 495c803..2c3bff5 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1065,6 +1065,7 @@
 	attr.srq           = srq;
 	attr.sq_sig_type   = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
 	attr.qp_type       = cmd.qp_type;
+	attr.create_flags  = 0;
 
 	attr.cap.max_send_wr     = cmd.max_send_wr;
 	attr.cap.max_recv_wr     = cmd.max_recv_wr;
@@ -1462,7 +1463,6 @@
 		next->num_sge    = user_wr->num_sge;
 		next->opcode     = user_wr->opcode;
 		next->send_flags = user_wr->send_flags;
-		next->imm_data   = (__be32 __force) user_wr->imm_data;
 
 		if (is_ud) {
 			next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
@@ -1475,14 +1475,24 @@
 			next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
 		} else {
 			switch (next->opcode) {
-			case IB_WR_RDMA_WRITE:
 			case IB_WR_RDMA_WRITE_WITH_IMM:
+				next->ex.imm_data =
+					(__be32 __force) user_wr->ex.imm_data;
+			case IB_WR_RDMA_WRITE:
 			case IB_WR_RDMA_READ:
 				next->wr.rdma.remote_addr =
 					user_wr->wr.rdma.remote_addr;
 				next->wr.rdma.rkey        =
 					user_wr->wr.rdma.rkey;
 				break;
+			case IB_WR_SEND_WITH_IMM:
+				next->ex.imm_data =
+					(__be32 __force) user_wr->ex.imm_data;
+				break;
+			case IB_WR_SEND_WITH_INV:
+				next->ex.invalidate_rkey =
+					user_wr->ex.invalidate_rkey;
+				break;
 			case IB_WR_ATOMIC_CMP_AND_SWP:
 			case IB_WR_ATOMIC_FETCH_AND_ADD:
 				next->wr.atomic.remote_addr =
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 7c2ac39..f49f946 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -352,7 +352,7 @@
 	struct ib_uverbs_event *entry, *tmp;
 
 	spin_lock_irq(&file->lock);
-	file->file = NULL;
+	file->is_closed = 1;
 	list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
 		if (entry->counter)
 			list_del(&entry->obj_list);
@@ -390,7 +390,7 @@
 		return;
 
 	spin_lock_irqsave(&file->lock, flags);
-	if (!file->file) {
+	if (file->is_closed) {
 		spin_unlock_irqrestore(&file->lock, flags);
 		return;
 	}
@@ -423,7 +423,7 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&file->async_file->lock, flags);
-	if (!file->async_file->file) {
+	if (!file->async_file->is_closed) {
 		spin_unlock_irqrestore(&file->async_file->lock, flags);
 		return;
 	}
@@ -509,6 +509,7 @@
 	ev_file->uverbs_file = uverbs_file;
 	ev_file->async_queue = NULL;
 	ev_file->is_async    = is_async;
+	ev_file->is_closed   = 0;
 
 	*fd = get_unused_fd();
 	if (*fd < 0) {
@@ -516,25 +517,18 @@
 		goto err;
 	}
 
-	filp = get_empty_filp();
-	if (!filp) {
-		ret = -ENFILE;
-		goto err_fd;
-	}
-
-	ev_file->file      = filp;
-
 	/*
 	 * fops_get() can't fail here, because we're coming from a
 	 * system call on a uverbs file, which will already have a
 	 * module reference.
 	 */
-	filp->f_op 	   = fops_get(&uverbs_event_fops);
-	filp->f_path.mnt 	   = mntget(uverbs_event_mnt);
-	filp->f_path.dentry 	   = dget(uverbs_event_mnt->mnt_root);
-	filp->f_mapping    = filp->f_path.dentry->d_inode->i_mapping;
-	filp->f_flags      = O_RDONLY;
-	filp->f_mode       = FMODE_READ;
+	filp = alloc_file(uverbs_event_mnt, dget(uverbs_event_mnt->mnt_root),
+			  FMODE_READ, fops_get(&uverbs_event_fops));
+	if (!filp) {
+		ret = -ENFILE;
+		goto err_fd;
+	}
+
 	filp->private_data = ev_file;
 
 	return filp;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 86ed8af..0504208 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -248,7 +248,9 @@
 		  struct ib_srq_attr *srq_attr,
 		  enum ib_srq_attr_mask srq_attr_mask)
 {
-	return srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL);
+	return srq->device->modify_srq ?
+		srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) :
+		-ENOSYS;
 }
 EXPORT_SYMBOL(ib_modify_srq);
 
@@ -628,6 +630,13 @@
 }
 EXPORT_SYMBOL(ib_create_cq);
 
+int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+{
+	return cq->device->modify_cq ?
+		cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS;
+}
+EXPORT_SYMBOL(ib_modify_cq);
+
 int ib_destroy_cq(struct ib_cq *cq)
 {
 	if (atomic_read(&cq->usecnt))
@@ -672,6 +681,9 @@
 {
 	struct ib_mr *mr;
 
+	if (!pd->device->reg_phys_mr)
+		return ERR_PTR(-ENOSYS);
+
 	mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf,
 				     mr_access_flags, iova_start);
 
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index f283a9f..113f3c0 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -130,10 +130,10 @@
 		tx_desc->status = 0;
 
 		/* Set TXP_HTXD_UNINIT */
-		__raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+		__raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
 			     (void __iomem *) txp_desc + C2_TXP_ADDR);
 		__raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
-		__raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+		__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
 			     (void __iomem *) txp_desc + C2_TXP_FLAGS);
 
 		elem->skb = NULL;
@@ -179,13 +179,13 @@
 		rx_desc->status = 0;
 
 		/* Set RXP_HRXD_UNINIT */
-		__raw_writew(cpu_to_be16(RXP_HRXD_OK),
+		__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_OK),
 		       (void __iomem *) rxp_desc + C2_RXP_STATUS);
 		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
 		__raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
-		__raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+		__raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
 			     (void __iomem *) rxp_desc + C2_RXP_ADDR);
-		__raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+		__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
 			     (void __iomem *) rxp_desc + C2_RXP_FLAGS);
 
 		elem->skb = NULL;
@@ -239,10 +239,11 @@
 	rxp_hdr->flags = RXP_HRXD_READY;
 
 	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
-	__raw_writew(cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
+	__raw_writew((__force u16) cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
 		     elem->hw_desc + C2_RXP_LEN);
-	__raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
-	__raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+	__raw_writeq((__force u64) cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
+	__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
+		     elem->hw_desc + C2_RXP_FLAGS);
 
 	elem->skb = skb;
 	elem->mapaddr = mapaddr;
@@ -290,9 +291,9 @@
 		__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
 		__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
 		__raw_writew(0, elem->hw_desc + C2_RXP_LEN);
-		__raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+		__raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
 			     elem->hw_desc + C2_RXP_ADDR);
-		__raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+		__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
 			     elem->hw_desc + C2_RXP_FLAGS);
 
 		if (elem->skb) {
@@ -346,16 +347,16 @@
 					     elem->hw_desc + C2_TXP_LEN);
 				__raw_writeq(0,
 					     elem->hw_desc + C2_TXP_ADDR);
-				__raw_writew(cpu_to_be16(TXP_HTXD_DONE),
+				__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_DONE),
 					     elem->hw_desc + C2_TXP_FLAGS);
 				c2_port->netstats.tx_dropped++;
 				break;
 			} else {
 				__raw_writew(0,
 					     elem->hw_desc + C2_TXP_LEN);
-				__raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+				__raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
 					     elem->hw_desc + C2_TXP_ADDR);
-				__raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+				__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
 					     elem->hw_desc + C2_TXP_FLAGS);
 			}
 
@@ -390,7 +391,7 @@
 	for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
 	     elem = elem->next) {
 		txp_htxd.flags =
-		    be16_to_cpu(readw(elem->hw_desc + C2_TXP_FLAGS));
+		    be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_FLAGS));
 
 		if (txp_htxd.flags != TXP_HTXD_DONE)
 			break;
@@ -398,7 +399,7 @@
 		if (netif_msg_tx_done(c2_port)) {
 			/* PCI reads are expensive in fast path */
 			txp_htxd.len =
-			    be16_to_cpu(readw(elem->hw_desc + C2_TXP_LEN));
+			    be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_LEN));
 			pr_debug("%s: tx done slot %3Zu status 0x%x len "
 				"%5u bytes\n",
 				netdev->name, elem - tx_ring->start,
@@ -448,10 +449,12 @@
 	/* Write the descriptor to the adapter's rx ring */
 	__raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
 	__raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
-	__raw_writew(cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
+	__raw_writew((__force u16) cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
 		     elem->hw_desc + C2_RXP_LEN);
-	__raw_writeq(cpu_to_be64(elem->mapaddr), elem->hw_desc + C2_RXP_ADDR);
-	__raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+	__raw_writeq((__force u64) cpu_to_be64(elem->mapaddr),
+		     elem->hw_desc + C2_RXP_ADDR);
+	__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
+		     elem->hw_desc + C2_RXP_FLAGS);
 
 	pr_debug("packet dropped\n");
 	c2_port->netstats.rx_dropped++;
@@ -653,7 +656,7 @@
 	     i++, elem++) {
 		rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
 		rxp_hdr->flags = 0;
-		__raw_writew(cpu_to_be16(RXP_HRXD_READY),
+		__raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
 			     elem->hw_desc + C2_RXP_FLAGS);
 	}
 
@@ -787,9 +790,12 @@
 	elem->maplen = maplen;
 
 	/* Tell HW to xmit */
-	__raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_TXP_ADDR);
-	__raw_writew(cpu_to_be16(maplen), elem->hw_desc + C2_TXP_LEN);
-	__raw_writew(cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS);
+	__raw_writeq((__force u64) cpu_to_be64(mapaddr),
+		     elem->hw_desc + C2_TXP_ADDR);
+	__raw_writew((__force u16) cpu_to_be16(maplen),
+		     elem->hw_desc + C2_TXP_LEN);
+	__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
+		     elem->hw_desc + C2_TXP_FLAGS);
 
 	c2_port->netstats.tx_packets++;
 	c2_port->netstats.tx_bytes += maplen;
@@ -810,11 +816,11 @@
 			elem->maplen = maplen;
 
 			/* Tell HW to xmit */
-			__raw_writeq(cpu_to_be64(mapaddr),
+			__raw_writeq((__force u64) cpu_to_be64(mapaddr),
 				     elem->hw_desc + C2_TXP_ADDR);
-			__raw_writew(cpu_to_be16(maplen),
+			__raw_writew((__force u16) cpu_to_be16(maplen),
 				     elem->hw_desc + C2_TXP_LEN);
-			__raw_writew(cpu_to_be16(TXP_HTXD_READY),
+			__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
 				     elem->hw_desc + C2_TXP_FLAGS);
 
 			c2_port->netstats.tx_packets++;
@@ -1005,7 +1011,7 @@
 	/* Remap the adapter PCI registers in BAR4 */
 	mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
 				    sizeof(struct c2_adapter_pci_regs));
-	if (mmio_regs == 0UL) {
+	if (!mmio_regs) {
 		printk(KERN_ERR PFX
 			"Unable to remap adapter PCI registers in BAR4\n");
 		ret = -EIO;
@@ -1029,10 +1035,10 @@
 	}
 
 	/* Validate the adapter version */
-	if (be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
+	if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
 		printk(KERN_ERR PFX "Version mismatch "
 			"[fw=%u, c2=%u], Adapter not claimed\n",
-			be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)),
+			be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)),
 			C2_VERSION);
 		ret = -EINVAL;
 		iounmap(mmio_regs);
@@ -1040,12 +1046,12 @@
 	}
 
 	/* Validate the adapter IVN */
-	if (be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
+	if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
 		printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
 		       "the OpenIB device support kit. "
 		       "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
-			be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)),
-			C2_IVN);
+		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)),
+		       C2_IVN);
 		ret = -EINVAL;
 		iounmap(mmio_regs);
 		goto bail2;
@@ -1068,7 +1074,7 @@
 
 	/* Get the last RX index */
 	c2dev->cur_rx =
-	    (be32_to_cpu(readl(mmio_regs + C2_REGS_HRX_CUR)) -
+	    (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_HRX_CUR)) -
 	     0xffffc000) / sizeof(struct c2_rxp_desc);
 
 	/* Request an interrupt line for the driver */
@@ -1090,7 +1096,7 @@
 	}
 
 	/* Save off the actual size prior to unmapping mmio_regs */
-	kva_map_size = be32_to_cpu(readl(mmio_regs + C2_REGS_PCI_WINSIZE));
+	kva_map_size = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_PCI_WINSIZE));
 
 	/* Unmap the adapter PCI registers in BAR4 */
 	iounmap(mmio_regs);
@@ -1109,7 +1115,7 @@
 	/* Remap the adapter HRXDQ PA space to kernel VA space */
 	c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
 					       C2_RXP_HRXDQ_SIZE);
-	if (c2dev->mmio_rxp_ring == 0UL) {
+	if (!c2dev->mmio_rxp_ring) {
 		printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
 		ret = -EIO;
 		goto bail6;
@@ -1118,7 +1124,7 @@
 	/* Remap the adapter HTXDQ PA space to kernel VA space */
 	c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
 					       C2_TXP_HTXDQ_SIZE);
-	if (c2dev->mmio_txp_ring == 0UL) {
+	if (!c2dev->mmio_txp_ring) {
 		printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
 		ret = -EIO;
 		goto bail7;
@@ -1129,7 +1135,7 @@
 
 	/* Remap the PCI registers in adapter BAR0 to kernel VA space */
 	c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
-	if (c2dev->regs == 0UL) {
+	if (!c2dev->regs) {
 		printk(KERN_ERR PFX "Unable to remap BAR0\n");
 		ret = -EIO;
 		goto bail8;
@@ -1139,7 +1145,7 @@
 	c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
 	c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
 				     kva_map_size);
-	if (c2dev->kva == 0UL) {
+	if (!c2dev->kva) {
 		printk(KERN_ERR PFX "Unable to remap BAR4\n");
 		ret = -EIO;
 		goto bail9;
diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h
index fa58200..ed38ab8 100644
--- a/drivers/infiniband/hw/amso1100/c2.h
+++ b/drivers/infiniband/hw/amso1100/c2.h
@@ -346,7 +346,7 @@
 	//	spinlock_t aeq_lock;
 	//	spinlock_t rnic_lock;
 
-	u16 *hint_count;
+	__be16 *hint_count;
 	dma_addr_t hint_count_dma;
 	u16 hints_read;
 
@@ -425,10 +425,10 @@
 #endif
 
 #define C2_SET_CUR_RX(c2dev, cur_rx) \
-	__raw_writel(cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
+	__raw_writel((__force u32) cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
 
 #define C2_GET_CUR_RX(c2dev) \
-	be32_to_cpu(readl(c2dev->mmio_txp_ring + 4092))
+	be32_to_cpu((__force __be32) readl(c2dev->mmio_txp_ring + 4092))
 
 static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
 {
@@ -485,8 +485,8 @@
 extern int c2_rnic_init(struct c2_dev *c2dev);
 extern void c2_rnic_term(struct c2_dev *c2dev);
 extern void c2_rnic_interrupt(struct c2_dev *c2dev);
-extern int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
-extern int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
+extern int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
+extern int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
 
 /* QPs */
 extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
@@ -545,7 +545,7 @@
 extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
 			     struct sp_chunk **root);
 extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
-extern u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
-			  dma_addr_t *dma_addr, gfp_t gfp_mask);
-extern void c2_free_mqsp(u16 * mqsp);
+extern __be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+			     dma_addr_t *dma_addr, gfp_t gfp_mask);
+extern void c2_free_mqsp(__be16* mqsp);
 #endif
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
index a31439b..62af742 100644
--- a/drivers/infiniband/hw/amso1100/c2_ae.c
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -61,7 +61,7 @@
 	default:
 		printk(KERN_ERR PFX
 		       "%s - Unable to convert CM status: %d\n",
-		       __FUNCTION__, c2_status);
+		       __func__, c2_status);
 		return -EIO;
 	}
 }
@@ -193,9 +193,9 @@
 		pr_debug("%s: event = %s, user_context=%llx, "
 			"resource_type=%x, "
 			"resource=%x, qp_state=%s\n",
-			__FUNCTION__,
+			__func__,
 			to_event_str(event_id),
-			(unsigned long long) be64_to_cpu(wr->ae.ae_generic.user_context),
+			(unsigned long long) wr->ae.ae_generic.user_context,
 			be32_to_cpu(wr->ae.ae_generic.resource_type),
 			be32_to_cpu(wr->ae.ae_generic.resource),
 			to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
@@ -259,7 +259,7 @@
 			BUG_ON(1);
 			pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
 				"CM_ID=%p\n",
-				__FUNCTION__, __LINE__,
+				__func__, __LINE__,
 				event_id, qp, cm_id);
 			break;
 		}
@@ -276,7 +276,7 @@
 		pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
 		if (event_id != CCAE_CONNECTION_REQUEST) {
 			pr_debug("%s: Invalid event_id: %d\n",
-				__FUNCTION__, event_id);
+				__func__, event_id);
 			break;
 		}
 		cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c
index 0315f99..e911016 100644
--- a/drivers/infiniband/hw/amso1100/c2_alloc.c
+++ b/drivers/infiniband/hw/amso1100/c2_alloc.c
@@ -87,8 +87,8 @@
 	}
 }
 
-u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
-		   dma_addr_t *dma_addr, gfp_t gfp_mask)
+__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+		      dma_addr_t *dma_addr, gfp_t gfp_mask)
 {
 	u16 mqsp;
 
@@ -113,14 +113,14 @@
 		*dma_addr = head->dma_addr +
 			    ((unsigned long) &(head->shared_ptr[mqsp]) -
 			     (unsigned long) head);
-		pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__,
+		pr_debug("%s addr %p dma_addr %llx\n", __func__,
 			 &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr);
-		return &(head->shared_ptr[mqsp]);
+		return (__force __be16 *) &(head->shared_ptr[mqsp]);
 	}
 	return NULL;
 }
 
-void c2_free_mqsp(u16 * mqsp)
+void c2_free_mqsp(__be16 *mqsp)
 {
 	struct sp_chunk *head;
 	u16 idx;
@@ -129,7 +129,7 @@
 	head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
 
 	/* Link head to new mqsp */
-	*mqsp = head->head;
+	*mqsp = (__force __be16) head->head;
 
 	/* Compute the shared_ptr index */
 	idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1;
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
index d2b3366..bb17cce 100644
--- a/drivers/infiniband/hw/amso1100/c2_cq.c
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -422,8 +422,8 @@
 		goto bail1;
 
 	reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
-
-	vq_repbuf_free(c2dev, reply);
+	if (reply)
+		vq_repbuf_free(c2dev, reply);
       bail1:
 	vq_req_free(c2dev, vq_req);
       bail0:
diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c
index 0d0bc33..3b50954 100644
--- a/drivers/infiniband/hw/amso1100/c2_intr.c
+++ b/drivers/infiniband/hw/amso1100/c2_intr.c
@@ -174,7 +174,11 @@
 		return;
 	}
 
-	err = c2_errno(reply_msg);
+	if (reply_msg)
+		err = c2_errno(reply_msg);
+	else
+		err = -ENOMEM;
+
 	if (!err) switch (req->event) {
 	case IW_CM_EVENT_ESTABLISHED:
 		c2_set_qp_state(req->qp,
diff --git a/drivers/infiniband/hw/amso1100/c2_mm.c b/drivers/infiniband/hw/amso1100/c2_mm.c
index 1e4f464..b506fe2 100644
--- a/drivers/infiniband/hw/amso1100/c2_mm.c
+++ b/drivers/infiniband/hw/amso1100/c2_mm.c
@@ -45,7 +45,7 @@
  *	  Reply buffer _is_ freed by this function.
  */
 static int
-send_pbl_messages(struct c2_dev *c2dev, u32 stag_index,
+send_pbl_messages(struct c2_dev *c2dev, __be32 stag_index,
 		  unsigned long va, u32 pbl_depth,
 		  struct c2_vq_req *vq_req, int pbl_type)
 {
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.c b/drivers/infiniband/hw/amso1100/c2_mq.c
index b88a755..0cddc49 100644
--- a/drivers/infiniband/hw/amso1100/c2_mq.c
+++ b/drivers/infiniband/hw/amso1100/c2_mq.c
@@ -64,7 +64,7 @@
 		q->priv = (q->priv + 1) % q->q_size;
 		q->hint_count++;
 		/* Update peer's offset. */
-		__raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+		__raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
 	}
 }
 
@@ -105,7 +105,7 @@
 #endif
 		q->priv = (q->priv + 1) % q->q_size;
 		/* Update peer's offset. */
-		__raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
+		__raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
 	}
 }
 
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.h b/drivers/infiniband/hw/amso1100/c2_mq.h
index 9185bbb..acede00 100644
--- a/drivers/infiniband/hw/amso1100/c2_mq.h
+++ b/drivers/infiniband/hw/amso1100/c2_mq.h
@@ -75,7 +75,7 @@
 	u16 hint_count;
 	u16 priv;
 	struct c2_mq_shared __iomem *peer;
-	u16 *shared;
+	__be16 *shared;
 	dma_addr_t shared_dma;
 	u32 q_size;
 	u32 msg_size;
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 7a6cece..e10d27a 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -67,7 +67,7 @@
 {
 	struct c2_dev *c2dev = to_c2dev(ibdev);
 
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 
 	*props = c2dev->props;
 	return 0;
@@ -76,7 +76,7 @@
 static int c2_query_port(struct ib_device *ibdev,
 			 u8 port, struct ib_port_attr *props)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 
 	props->max_mtu = IB_MTU_4096;
 	props->lid = 0;
@@ -102,14 +102,14 @@
 			  u8 port, int port_modify_mask,
 			  struct ib_port_modify *props)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	return 0;
 }
 
 static int c2_query_pkey(struct ib_device *ibdev,
 			 u8 port, u16 index, u16 * pkey)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	*pkey = 0;
 	return 0;
 }
@@ -119,7 +119,7 @@
 {
 	struct c2_dev *c2dev = to_c2dev(ibdev);
 
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
 	memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
 
@@ -134,7 +134,7 @@
 {
 	struct c2_ucontext *context;
 
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	context = kmalloc(sizeof(*context), GFP_KERNEL);
 	if (!context)
 		return ERR_PTR(-ENOMEM);
@@ -144,14 +144,14 @@
 
 static int c2_dealloc_ucontext(struct ib_ucontext *context)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	kfree(context);
 	return 0;
 }
 
 static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	return -ENOSYS;
 }
 
@@ -162,7 +162,7 @@
 	struct c2_pd *pd;
 	int err;
 
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 
 	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
 	if (!pd)
@@ -187,7 +187,7 @@
 
 static int c2_dealloc_pd(struct ib_pd *pd)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
 	kfree(pd);
 
@@ -196,13 +196,13 @@
 
 static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	return ERR_PTR(-ENOSYS);
 }
 
 static int c2_ah_destroy(struct ib_ah *ah)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	return -ENOSYS;
 }
 
@@ -230,7 +230,7 @@
 
 	qp = c2_find_qpn(c2dev, qpn);
 	pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
-		__FUNCTION__, qp, qpn, device,
+		__func__, qp, qpn, device,
 		(qp?atomic_read(&qp->refcount):0));
 
 	return (qp?&qp->ibqp:NULL);
@@ -243,13 +243,16 @@
 	struct c2_qp *qp;
 	int err;
 
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
+
+	if (init_attr->create_flags)
+		return ERR_PTR(-EINVAL);
 
 	switch (init_attr->qp_type) {
 	case IB_QPT_RC:
 		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 		if (!qp) {
-			pr_debug("%s: Unable to allocate QP\n", __FUNCTION__);
+			pr_debug("%s: Unable to allocate QP\n", __func__);
 			return ERR_PTR(-ENOMEM);
 		}
 		spin_lock_init(&qp->lock);
@@ -266,7 +269,7 @@
 
 		break;
 	default:
-		pr_debug("%s: Invalid QP type: %d\n", __FUNCTION__,
+		pr_debug("%s: Invalid QP type: %d\n", __func__,
 			init_attr->qp_type);
 		return ERR_PTR(-EINVAL);
 		break;
@@ -285,7 +288,7 @@
 	struct c2_qp *qp = to_c2qp(ib_qp);
 
 	pr_debug("%s:%u qp=%p,qp->state=%d\n",
-		__FUNCTION__, __LINE__,ib_qp,qp->state);
+		__func__, __LINE__, ib_qp, qp->state);
 	c2_free_qp(to_c2dev(ib_qp->device), qp);
 	kfree(qp);
 	return 0;
@@ -300,13 +303,13 @@
 
 	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
 	if (!cq) {
-		pr_debug("%s: Unable to allocate CQ\n", __FUNCTION__);
+		pr_debug("%s: Unable to allocate CQ\n", __func__);
 		return ERR_PTR(-ENOMEM);
 	}
 
 	err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
 	if (err) {
-		pr_debug("%s: error initializing CQ\n", __FUNCTION__);
+		pr_debug("%s: error initializing CQ\n", __func__);
 		kfree(cq);
 		return ERR_PTR(err);
 	}
@@ -318,7 +321,7 @@
 {
 	struct c2_cq *cq = to_c2cq(ib_cq);
 
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 
 	c2_free_cq(to_c2dev(ib_cq->device), cq);
 	kfree(cq);
@@ -400,7 +403,7 @@
 	mr->umem = NULL;
 	pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
 		"*iova_start %llx, first pa %llx, last pa %llx\n",
-		__FUNCTION__, page_shift, pbl_depth, total_len,
+		__func__, page_shift, pbl_depth, total_len,
 		(unsigned long long) *iova_start,
 	       	(unsigned long long) page_list[0],
 	       	(unsigned long long) page_list[pbl_depth-1]);
@@ -422,7 +425,7 @@
 	struct ib_phys_buf bl;
 	u64 kva = 0;
 
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 
 	/* AMSO1100 limit */
 	bl.size = 0xffffffff;
@@ -442,7 +445,7 @@
 	struct c2_pd *c2pd = to_c2pd(pd);
 	struct c2_mr *c2mr;
 
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 
 	c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
 	if (!c2mr)
@@ -506,7 +509,7 @@
 	struct c2_mr *mr = to_c2mr(ib_mr);
 	int err;
 
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 
 	err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
 	if (err)
@@ -523,14 +526,14 @@
 static ssize_t show_rev(struct class_device *cdev, char *buf)
 {
 	struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	return sprintf(buf, "%x\n", dev->props.hw_ver);
 }
 
 static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
 {
 	struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	return sprintf(buf, "%x.%x.%x\n",
 		       (int) (dev->props.fw_ver >> 32),
 		       (int) (dev->props.fw_ver >> 16) & 0xffff,
@@ -539,13 +542,13 @@
 
 static ssize_t show_hca(struct class_device *cdev, char *buf)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	return sprintf(buf, "AMSO1100\n");
 }
 
 static ssize_t show_board(struct class_device *cdev, char *buf)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
 }
 
@@ -575,13 +578,13 @@
 
 static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	return -ENOSYS;
 }
 
 static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	return -ENOSYS;
 }
 
@@ -592,13 +595,13 @@
 			  struct ib_grh *in_grh,
 			  struct ib_mad *in_mad, struct ib_mad *out_mad)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	return -ENOSYS;
 }
 
 static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 
 	/* Request a connection */
 	return c2_llp_connect(cm_id, iw_param);
@@ -606,7 +609,7 @@
 
 static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 
 	/* Accept the new connection */
 	return c2_llp_accept(cm_id, iw_param);
@@ -616,7 +619,7 @@
 {
 	int err;
 
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 
 	err = c2_llp_reject(cm_id, pdata, pdata_len);
 	return err;
@@ -626,10 +629,10 @@
 {
 	int err;
 
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	err = c2_llp_service_create(cm_id, backlog);
 	pr_debug("%s:%u err=%d\n",
-		__FUNCTION__, __LINE__,
+		__func__, __LINE__,
 		err);
 	return err;
 }
@@ -637,7 +640,7 @@
 static int c2_service_destroy(struct iw_cm_id *cm_id)
 {
 	int err;
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 
 	err = c2_llp_service_destroy(cm_id);
 
@@ -743,7 +746,7 @@
 	netdev = alloc_netdev(sizeof(*netdev), name, setup);
 	if (!netdev) {
 		printk(KERN_ERR PFX "%s -  etherdev alloc failed",
-			__FUNCTION__);
+			__func__);
 		return NULL;
 	}
 
@@ -780,7 +783,7 @@
 	if (ret)
 		goto out2;
 
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
 	dev->ibdev.owner = THIS_MODULE;
 	dev->ibdev.uverbs_cmd_mask =
@@ -873,13 +876,13 @@
 out2:
 	free_netdev(dev->pseudo_netdev);
 out3:
-	pr_debug("%s:%u ret=%d\n", __FUNCTION__, __LINE__, ret);
+	pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret);
 	return ret;
 }
 
 void c2_unregister_device(struct c2_dev *dev)
 {
-	pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
+	pr_debug("%s:%u\n", __func__, __LINE__);
 	unregister_netdev(dev->pseudo_netdev);
 	free_netdev(dev->pseudo_netdev);
 	ib_unregister_device(&dev->ibdev);
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
index 01d0786..a6d8944 100644
--- a/drivers/infiniband/hw/amso1100/c2_qp.c
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -121,7 +121,7 @@
 	int new_state = to_ib_state(c2_state);
 
 	pr_debug("%s: qp[%p] state modify %s --> %s\n",
-	       __FUNCTION__,
+	       __func__,
 		qp,
 		to_ib_state_str(qp->state),
 		to_ib_state_str(new_state));
@@ -141,7 +141,7 @@
 	int err;
 
 	pr_debug("%s:%d qp=%p, %s --> %s\n",
-		__FUNCTION__, __LINE__,
+		__func__, __LINE__,
 		qp,
 		to_ib_state_str(qp->state),
 		to_ib_state_str(attr->qp_state));
@@ -224,7 +224,7 @@
 		qp->state = next_state;
 #ifdef DEBUG
 	else
-		pr_debug("%s: c2_errno=%d\n", __FUNCTION__, err);
+		pr_debug("%s: c2_errno=%d\n", __func__, err);
 #endif
 	/*
 	 * If we're going to error and generating the event here, then
@@ -243,7 +243,7 @@
 	vq_req_free(c2dev, vq_req);
 
 	pr_debug("%s:%d qp=%p, cur_state=%s\n",
-		__FUNCTION__, __LINE__,
+		__func__, __LINE__,
 		qp,
 		to_ib_state_str(qp->state));
 	return err;
@@ -811,16 +811,24 @@
 
 		switch (ib_wr->opcode) {
 		case IB_WR_SEND:
-			if (ib_wr->send_flags & IB_SEND_SOLICITED) {
-				c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
-				msg_size = sizeof(struct c2wr_send_req);
+		case IB_WR_SEND_WITH_INV:
+			if (ib_wr->opcode == IB_WR_SEND) {
+				if (ib_wr->send_flags & IB_SEND_SOLICITED)
+					c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
+				else
+					c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
+				wr.sqwr.send.remote_stag = 0;
 			} else {
-				c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
-				msg_size = sizeof(struct c2wr_send_req);
+				if (ib_wr->send_flags & IB_SEND_SOLICITED)
+					c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE_INV);
+				else
+					c2_wr_set_id(&wr, C2_WR_TYPE_SEND_INV);
+				wr.sqwr.send.remote_stag =
+					cpu_to_be32(ib_wr->ex.invalidate_rkey);
 			}
 
-			wr.sqwr.send.remote_stag = 0;
-			msg_size += sizeof(struct c2_data_addr) * ib_wr->num_sge;
+			msg_size = sizeof(struct c2wr_send_req) +
+				sizeof(struct c2_data_addr) * ib_wr->num_sge;
 			if (ib_wr->num_sge > qp->send_sgl_depth) {
 				err = -EINVAL;
 				break;
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
index 1687c51..9a054c6 100644
--- a/drivers/infiniband/hw/amso1100/c2_rnic.c
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -208,7 +208,7 @@
 /*
  * Add an IP address to the RNIC interface
  */
-int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
 {
 	struct c2_vq_req *vq_req;
 	struct c2wr_rnic_setconfig_req *wr;
@@ -270,7 +270,7 @@
 /*
  * Delete an IP address from the RNIC interface
  */
-int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
+int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
 {
 	struct c2_vq_req *vq_req;
 	struct c2wr_rnic_setconfig_req *wr;
@@ -455,7 +455,8 @@
 	     IB_DEVICE_CURR_QP_STATE_MOD |
 	     IB_DEVICE_SYS_IMAGE_GUID |
 	     IB_DEVICE_ZERO_STAG |
-	     IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+	     IB_DEVICE_MEM_WINDOW |
+	     IB_DEVICE_SEND_W_INV);
 
 	/* Allocate the qptr_array */
 	c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
@@ -506,17 +507,17 @@
 	mmio_regs = c2dev->kva;
 	/* Initialize the Verbs Request Queue */
 	c2_mq_req_init(&c2dev->req_vq, 0,
-		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_QSIZE)),
-		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
+		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_QSIZE)),
+		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
 		       mmio_regs +
-		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
+		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
 		       mmio_regs +
-		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_SHARED)),
+		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_SHARED)),
 		       C2_MQ_ADAPTER_TARGET);
 
 	/* Initialize the Verbs Reply Queue */
-	qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_QSIZE));
-	msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
+	qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_QSIZE));
+	msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
 	q1_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
 				      &c2dev->rep_vq.host_dma, GFP_KERNEL);
 	if (!q1_pages) {
@@ -524,7 +525,7 @@
 		goto bail1;
 	}
 	pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
-	pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages,
+	pr_debug("%s rep_vq va %p dma %llx\n", __func__, q1_pages,
 		 (unsigned long long) c2dev->rep_vq.host_dma);
 	c2_mq_rep_init(&c2dev->rep_vq,
 		   1,
@@ -532,12 +533,12 @@
 		   msgsize,
 		   q1_pages,
 		   mmio_regs +
-		   be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_SHARED)),
+		   be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_SHARED)),
 		   C2_MQ_HOST_TARGET);
 
 	/* Initialize the Asynchronus Event Queue */
-	qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_QSIZE));
-	msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
+	qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_QSIZE));
+	msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
 	q2_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
 				      &c2dev->aeq.host_dma, GFP_KERNEL);
 	if (!q2_pages) {
@@ -545,7 +546,7 @@
 		goto bail2;
 	}
 	pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
-	pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q2_pages,
+	pr_debug("%s aeq va %p dma %llx\n", __func__, q2_pages,
 		 (unsigned long long) c2dev->aeq.host_dma);
 	c2_mq_rep_init(&c2dev->aeq,
 		       2,
@@ -553,7 +554,7 @@
 		       msgsize,
 		       q2_pages,
 		       mmio_regs +
-		       be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_SHARED)),
+		       be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_SHARED)),
 		       C2_MQ_HOST_TARGET);
 
 	/* Initialize the verbs request allocator */
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c
index cfdacb1..9ce7819b 100644
--- a/drivers/infiniband/hw/amso1100/c2_vq.c
+++ b/drivers/infiniband/hw/amso1100/c2_vq.c
@@ -197,7 +197,7 @@
 	 */
 	while (msg == NULL) {
 		pr_debug("%s:%d no available msg in VQ, waiting...\n",
-		       __FUNCTION__, __LINE__);
+		       __func__, __LINE__);
 		init_waitqueue_entry(&__wait, current);
 		add_wait_queue(&c2dev->req_vq_wo, &__wait);
 		spin_unlock(&c2dev->vqlock);
diff --git a/drivers/infiniband/hw/amso1100/c2_wr.h b/drivers/infiniband/hw/amso1100/c2_wr.h
index 3ec6c43..c65fbdd 100644
--- a/drivers/infiniband/hw/amso1100/c2_wr.h
+++ b/drivers/infiniband/hw/amso1100/c2_wr.h
@@ -180,8 +180,8 @@
 };
 
 struct c2_netaddr {
-	u32 ip_addr;
-	u32 netmask;
+	__be32 ip_addr;
+	__be32 netmask;
 	u32 mtu;
 };
 
@@ -199,9 +199,9 @@
  * A Scatter Gather Entry.
  */
 struct c2_data_addr {
-	u32 stag;
-	u32 length;
-	u64 to;
+	__be32 stag;
+	__be32 length;
+	__be64 to;
 };
 
 /*
@@ -274,7 +274,7 @@
 	 * from the host to adapter by libccil, but we copy it anyway
 	 * to make the memcpy to the adapter better aligned.
 	 */
-	u32 wqe_count;
+	__be32 wqe_count;
 
 	/* Put these fields next so that later 32- and 64-bit
 	 * quantities are naturally aligned.
@@ -316,8 +316,8 @@
 struct c2wr_rnic_open_req {
 	struct c2wr_hdr hdr;
 	u64 user_context;
-	u16 flags;		/* See enum c2_rnic_flags */
-	u16 port_num;
+	__be16 flags;		/* See enum c2_rnic_flags */
+	__be16 port_num;
 } __attribute__((packed));
 
 struct c2wr_rnic_open_rep {
@@ -341,30 +341,30 @@
 struct c2wr_rnic_query_rep {
 	struct c2wr_hdr hdr;
 	u64 user_context;
-	u32 vendor_id;
-	u32 part_number;
-	u32 hw_version;
-	u32 fw_ver_major;
-	u32 fw_ver_minor;
-	u32 fw_ver_patch;
+	__be32 vendor_id;
+	__be32 part_number;
+	__be32 hw_version;
+	__be32 fw_ver_major;
+	__be32 fw_ver_minor;
+	__be32 fw_ver_patch;
 	char fw_ver_build_str[WR_BUILD_STR_LEN];
-	u32 max_qps;
-	u32 max_qp_depth;
+	__be32 max_qps;
+	__be32 max_qp_depth;
 	u32 max_srq_depth;
 	u32 max_send_sgl_depth;
 	u32 max_rdma_sgl_depth;
-	u32 max_cqs;
-	u32 max_cq_depth;
+	__be32 max_cqs;
+	__be32 max_cq_depth;
 	u32 max_cq_event_handlers;
-	u32 max_mrs;
+	__be32 max_mrs;
 	u32 max_pbl_depth;
-	u32 max_pds;
-	u32 max_global_ird;
+	__be32 max_pds;
+	__be32 max_global_ird;
 	u32 max_global_ord;
-	u32 max_qp_ird;
-	u32 max_qp_ord;
+	__be32 max_qp_ird;
+	__be32 max_qp_ord;
 	u32 flags;
-	u32 max_mws;
+	__be32 max_mws;
 	u32 pbe_range_low;
 	u32 pbe_range_high;
 	u32 max_srqs;
@@ -405,7 +405,7 @@
 struct c2wr_rnic_setconfig_req {
 	struct c2wr_hdr hdr;
 	u32 rnic_handle;
-	u32 option;		/* See c2_setconfig_cmd_t */
+	__be32 option;		/* See c2_setconfig_cmd_t */
 	/* variable data and pad. See c2_netaddr and c2_route */
 	u8 data[0];
 } __attribute__((packed)) ;
@@ -441,18 +441,18 @@
  */
 struct c2wr_cq_create_req {
 	struct c2wr_hdr hdr;
-	u64 shared_ht;
+	__be64 shared_ht;
 	u64 user_context;
-	u64 msg_pool;
+	__be64 msg_pool;
 	u32 rnic_handle;
-	u32 msg_size;
-	u32 depth;
+	__be32 msg_size;
+	__be32 depth;
 } __attribute__((packed)) ;
 
 struct c2wr_cq_create_rep {
 	struct c2wr_hdr hdr;
-	u32 mq_index;
-	u32 adapter_shared;
+	__be32 mq_index;
+	__be32 adapter_shared;
 	u32 cq_handle;
 } __attribute__((packed)) ;
 
@@ -585,40 +585,40 @@
 
 struct c2wr_qp_create_req {
 	struct c2wr_hdr hdr;
-	u64 shared_sq_ht;
-	u64 shared_rq_ht;
+	__be64 shared_sq_ht;
+	__be64 shared_rq_ht;
 	u64 user_context;
 	u32 rnic_handle;
 	u32 sq_cq_handle;
 	u32 rq_cq_handle;
-	u32 sq_depth;
-	u32 rq_depth;
+	__be32 sq_depth;
+	__be32 rq_depth;
 	u32 srq_handle;
 	u32 srq_limit;
-	u32 flags;		/* see enum c2wr_qp_flags */
-	u32 send_sgl_depth;
-	u32 recv_sgl_depth;
-	u32 rdma_write_sgl_depth;
-	u32 ord;
-	u32 ird;
+	__be32 flags;		/* see enum c2wr_qp_flags */
+	__be32 send_sgl_depth;
+	__be32 recv_sgl_depth;
+	__be32 rdma_write_sgl_depth;
+	__be32 ord;
+	__be32 ird;
 	u32 pd_id;
 } __attribute__((packed)) ;
 
 struct c2wr_qp_create_rep {
 	struct c2wr_hdr hdr;
-	u32 sq_depth;
-	u32 rq_depth;
+	__be32 sq_depth;
+	__be32 rq_depth;
 	u32 send_sgl_depth;
 	u32 recv_sgl_depth;
 	u32 rdma_write_sgl_depth;
 	u32 ord;
 	u32 ird;
-	u32 sq_msg_size;
-	u32 sq_mq_index;
-	u32 sq_mq_start;
-	u32 rq_msg_size;
-	u32 rq_mq_index;
-	u32 rq_mq_start;
+	__be32 sq_msg_size;
+	__be32 sq_mq_index;
+	__be32 sq_mq_start;
+	__be32 rq_msg_size;
+	__be32 rq_mq_index;
+	__be32 rq_mq_start;
 	u32 qp_handle;
 } __attribute__((packed)) ;
 
@@ -667,11 +667,11 @@
 	u32 stream_msg_length;
 	u32 rnic_handle;
 	u32 qp_handle;
-	u32 next_qp_state;
-	u32 ord;
-	u32 ird;
-	u32 sq_depth;
-	u32 rq_depth;
+	__be32 next_qp_state;
+	__be32 ord;
+	__be32 ird;
+	__be32 sq_depth;
+	__be32 rq_depth;
 	u32 llp_ep_handle;
 } __attribute__((packed)) ;
 
@@ -721,10 +721,10 @@
 	struct c2wr_hdr hdr;
 	u32 rnic_handle;
 	u32 qp_handle;
-	u32 remote_addr;
-	u16 remote_port;
+	__be32 remote_addr;
+	__be16 remote_port;
 	u16 pad;
-	u32 private_data_length;
+	__be32 private_data_length;
 	u8 private_data[0];	/* Private data in-line. */
 } __attribute__((packed)) ;
 
@@ -759,25 +759,25 @@
 
 struct c2wr_nsmr_register_req {
 	struct c2wr_hdr hdr;
-	u64 va;
+	__be64 va;
 	u32 rnic_handle;
-	u16 flags;
+	__be16 flags;
 	u8 stag_key;
 	u8 pad;
 	u32 pd_id;
-	u32 pbl_depth;
-	u32 pbe_size;
-	u32 fbo;
-	u32 length;
-	u32 addrs_length;
+	__be32 pbl_depth;
+	__be32 pbe_size;
+	__be32 fbo;
+	__be32 length;
+	__be32 addrs_length;
 	/* array of paddrs (must be aligned on a 64bit boundary) */
-	u64 paddrs[0];
+	__be64 paddrs[0];
 } __attribute__((packed)) ;
 
 struct c2wr_nsmr_register_rep {
 	struct c2wr_hdr hdr;
 	u32 pbl_depth;
-	u32 stag_index;
+	__be32 stag_index;
 } __attribute__((packed)) ;
 
 union c2wr_nsmr_register {
@@ -788,11 +788,11 @@
 struct c2wr_nsmr_pbl_req {
 	struct c2wr_hdr hdr;
 	u32 rnic_handle;
-	u32 flags;
-	u32 stag_index;
-	u32 addrs_length;
+	__be32 flags;
+	__be32 stag_index;
+	__be32 addrs_length;
 	/* array of paddrs (must be aligned on a 64bit boundary) */
-	u64 paddrs[0];
+	__be64 paddrs[0];
 } __attribute__((packed)) ;
 
 struct c2wr_nsmr_pbl_rep {
@@ -847,7 +847,7 @@
 struct c2wr_stag_dealloc_req {
 	struct c2wr_hdr hdr;
 	u32 rnic_handle;
-	u32 stag_index;
+	__be32 stag_index;
 } __attribute__((packed)) ;
 
 struct c2wr_stag_dealloc_rep {
@@ -949,7 +949,7 @@
 	u64 qp_user_context;	/* c2_user_qp_t * */
 	u32 qp_state;		/* Current QP State */
 	u32 handle;		/* QPID or EP Handle */
-	u32 bytes_rcvd;		/* valid for RECV WCs */
+	__be32 bytes_rcvd;		/* valid for RECV WCs */
 	u32 stag;
 } __attribute__((packed)) ;
 
@@ -984,8 +984,8 @@
  */
 struct c2wr_send_req {
 	struct c2_sq_hdr sq_hdr;
-	u32 sge_len;
-	u32 remote_stag;
+	__be32 sge_len;
+	__be32 remote_stag;
 	u8 data[0];		/* SGE array */
 } __attribute__((packed));
 
@@ -996,9 +996,9 @@
 
 struct c2wr_rdma_write_req {
 	struct c2_sq_hdr sq_hdr;
-	u64 remote_to;
-	u32 remote_stag;
-	u32 sge_len;
+	__be64 remote_to;
+	__be32 remote_stag;
+	__be32 sge_len;
 	u8 data[0];		/* SGE array */
 } __attribute__((packed));
 
@@ -1009,11 +1009,11 @@
 
 struct c2wr_rdma_read_req {
 	struct c2_sq_hdr sq_hdr;
-	u64 local_to;
-	u64 remote_to;
-	u32 local_stag;
-	u32 remote_stag;
-	u32 length;
+	__be64 local_to;
+	__be64 remote_to;
+	__be32 local_stag;
+	__be32 remote_stag;
+	__be32 length;
 } __attribute__((packed));
 
 union c2wr_rdma_read {
@@ -1113,9 +1113,9 @@
 struct c2wr_ae_hdr {
 	struct c2wr_hdr hdr;
 	u64 user_context;	/* user context for this res. */
-	u32 resource_type;	/* see enum c2_resource_indicator */
-	u32 resource;		/* handle for resource */
-	u32 qp_state;		/* current QP State */
+	__be32 resource_type;	/* see enum c2_resource_indicator */
+	__be32 resource;	/* handle for resource */
+	__be32 qp_state;	/* current QP State */
 } __attribute__((packed));
 
 /*
@@ -1124,11 +1124,11 @@
  */
 struct c2wr_ae_active_connect_results {
 	struct c2wr_ae_hdr ae_hdr;
-	u32 laddr;
-	u32 raddr;
-	u16 lport;
-	u16 rport;
-	u32 private_data_length;
+	__be32 laddr;
+	__be32 raddr;
+	__be16 lport;
+	__be16 rport;
+	__be32 private_data_length;
 	u8 private_data[0];	/* data is in-line in the msg. */
 } __attribute__((packed));
 
@@ -1142,11 +1142,11 @@
 struct c2wr_ae_connection_request {
 	struct c2wr_ae_hdr ae_hdr;
 	u32 cr_handle;		/* connreq handle (sock ptr) */
-	u32 laddr;
-	u32 raddr;
-	u16 lport;
-	u16 rport;
-	u32 private_data_length;
+	__be32 laddr;
+	__be32 raddr;
+	__be16 lport;
+	__be16 rport;
+	__be32 private_data_length;
 	u8 private_data[0];	/* data is in-line in the msg. */
 } __attribute__((packed));
 
@@ -1158,12 +1158,12 @@
 
 struct c2wr_init_req {
 	struct c2wr_hdr hdr;
-	u64 hint_count;
-	u64 q0_host_shared;
-	u64 q1_host_shared;
-	u64 q1_host_msg_pool;
-	u64 q2_host_shared;
-	u64 q2_host_msg_pool;
+	__be64 hint_count;
+	__be64 q0_host_shared;
+	__be64 q1_host_shared;
+	__be64 q1_host_msg_pool;
+	__be64 q2_host_shared;
+	__be64 q2_host_msg_pool;
 } __attribute__((packed));
 
 struct c2wr_init_rep {
@@ -1276,10 +1276,10 @@
 	struct c2wr_hdr hdr;
 	u64 user_context;	/* returned in AEs. */
 	u32 rnic_handle;
-	u32 local_addr;		/* local addr, or 0  */
-	u16 local_port;		/* 0 means "pick one" */
+	__be32 local_addr;		/* local addr, or 0  */
+	__be16 local_port;		/* 0 means "pick one" */
 	u16 pad;
-	u32 backlog;		/* tradional tcp listen bl */
+	__be32 backlog;		/* tradional tcp listen bl */
 } __attribute__((packed));
 
 struct c2wr_ep_listen_create_rep {
@@ -1340,7 +1340,7 @@
 	u32 rnic_handle;
 	u32 qp_handle;		/* QP to bind to this LLP conn */
 	u32 ep_handle;		/* LLP  handle to accept */
-	u32 private_data_length;
+	__be32 private_data_length;
 	u8 private_data[0];	/* data in-line in msg. */
 } __attribute__((packed));
 
@@ -1508,7 +1508,7 @@
 {
 	((struct c2wr_hdr *) wr)->sge_count = sge_count;
 }
-static __inline__ u32 c2_wr_get_wqe_count(void *wr)
+static __inline__ __be32 c2_wr_get_wqe_count(void *wr)
 {
 	return ((struct c2wr_hdr *) wr)->wqe_count;
 }
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
index 75f7b16..a8d24d5 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -45,16 +45,16 @@
 
 	m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
 	if (!m) {
-		PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
+		PDBG("%s couldn't allocate memory.\n", __func__);
 		return;
 	}
 	m->mem_id = MEM_PMRX;
 	m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
 	m->len = size;
-	PDBG("%s TPT addr 0x%x len %d\n", __FUNCTION__, m->addr, m->len);
+	PDBG("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len);
 	rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
 	if (rc) {
-		PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
+		PDBG("%s toectl returned error %d\n", __func__, rc);
 		kfree(m);
 		return;
 	}
@@ -82,17 +82,17 @@
 
 	m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
 	if (!m) {
-		PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
+		PDBG("%s couldn't allocate memory.\n", __func__);
 		return;
 	}
 	m->mem_id = MEM_PMRX;
 	m->addr = pbl_addr;
 	m->len = size;
 	PDBG("%s PBL addr 0x%x len %d depth %d\n",
-		__FUNCTION__, m->addr, m->len, npages);
+		__func__, m->addr, m->len, npages);
 	rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
 	if (rc) {
-		PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
+		PDBG("%s toectl returned error %d\n", __func__, rc);
 		kfree(m);
 		return;
 	}
@@ -144,16 +144,16 @@
 
 	m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
 	if (!m) {
-		PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
+		PDBG("%s couldn't allocate memory.\n", __func__);
 		return;
 	}
 	m->mem_id = MEM_PMRX;
 	m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
 	m->len = size;
-	PDBG("%s RQT addr 0x%x len %d\n", __FUNCTION__, m->addr, m->len);
+	PDBG("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len);
 	rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
 	if (rc) {
-		PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
+		PDBG("%s toectl returned error %d\n", __func__, rc);
 		kfree(m);
 		return;
 	}
@@ -177,16 +177,16 @@
 
 	m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
 	if (!m) {
-		PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
+		PDBG("%s couldn't allocate memory.\n", __func__);
 		return;
 	}
 	m->mem_id = MEM_CM;
 	m->addr = hwtid * size;
 	m->len = size;
-	PDBG("%s TCB %d len %d\n", __FUNCTION__, m->addr, m->len);
+	PDBG("%s TCB %d len %d\n", __func__, m->addr, m->len);
 	rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
 	if (rc) {
-		PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
+		PDBG("%s toectl returned error %d\n", __func__, rc);
 		kfree(m);
 		return;
 	}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 03c5ff6..66eb703 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -140,7 +140,7 @@
 	struct t3_modify_qp_wr *wqe;
 	struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
 	if (!skb) {
-		PDBG("%s alloc_skb failed\n", __FUNCTION__);
+		PDBG("%s alloc_skb failed\n", __func__);
 		return -ENOMEM;
 	}
 	wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
@@ -225,7 +225,7 @@
 	}
 out:
 	mutex_unlock(&uctx->lock);
-	PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
+	PDBG("%s qpid 0x%x\n", __func__, qpid);
 	return qpid;
 }
 
@@ -237,7 +237,7 @@
 	entry = kmalloc(sizeof *entry, GFP_KERNEL);
 	if (!entry)
 		return;
-	PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
+	PDBG("%s qpid 0x%x\n", __func__, qpid);
 	entry->qpid = qpid;
 	mutex_lock(&uctx->lock);
 	list_add_tail(&entry->entry, &uctx->qpids);
@@ -300,7 +300,7 @@
 	if (!kernel_domain)
 		wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
 					(wq->qpid << rdev_p->qpshift);
-	PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __FUNCTION__,
+	PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__,
 	     wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
 	return 0;
 err4:
@@ -345,7 +345,7 @@
 {
 	struct t3_cqe cqe;
 
-	PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __FUNCTION__,
+	PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
 	     wq, cq, cq->sw_rptr, cq->sw_wptr);
 	memset(&cqe, 0, sizeof(cqe));
 	cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
@@ -363,10 +363,10 @@
 {
 	u32 ptr;
 
-	PDBG("%s wq %p cq %p\n", __FUNCTION__, wq, cq);
+	PDBG("%s wq %p cq %p\n", __func__, wq, cq);
 
 	/* flush RQ */
-	PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __FUNCTION__,
+	PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
 	    wq->rq_rptr, wq->rq_wptr, count);
 	ptr = wq->rq_rptr + count;
 	while (ptr++ != wq->rq_wptr)
@@ -378,7 +378,7 @@
 {
 	struct t3_cqe cqe;
 
-	PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __FUNCTION__,
+	PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
 	     wq, cq, cq->sw_rptr, cq->sw_wptr);
 	memset(&cqe, 0, sizeof(cqe));
 	cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
@@ -415,11 +415,11 @@
 {
 	struct t3_cqe *cqe, *swcqe;
 
-	PDBG("%s cq %p cqid 0x%x\n", __FUNCTION__, cq, cq->cqid);
+	PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
 	cqe = cxio_next_hw_cqe(cq);
 	while (cqe) {
 		PDBG("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
-		     __FUNCTION__, cq->rptr, cq->sw_wptr);
+		     __func__, cq->rptr, cq->sw_wptr);
 		swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
 		*swcqe = *cqe;
 		swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
@@ -461,7 +461,7 @@
 			(*count)++;
 		ptr++;
 	}
-	PDBG("%s cq %p count %d\n", __FUNCTION__, cq, *count);
+	PDBG("%s cq %p count %d\n", __func__, cq, *count);
 }
 
 void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
@@ -470,7 +470,7 @@
 	u32 ptr;
 
 	*count = 0;
-	PDBG("%s count zero %d\n", __FUNCTION__, *count);
+	PDBG("%s count zero %d\n", __func__, *count);
 	ptr = cq->sw_rptr;
 	while (!Q_EMPTY(ptr, cq->sw_wptr)) {
 		cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
@@ -479,7 +479,7 @@
 			(*count)++;
 		ptr++;
 	}
-	PDBG("%s cq %p count %d\n", __FUNCTION__, cq, *count);
+	PDBG("%s cq %p count %d\n", __func__, cq, *count);
 }
 
 static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
@@ -506,12 +506,12 @@
 
 	skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
 	if (!skb) {
-		PDBG("%s alloc_skb failed\n", __FUNCTION__);
+		PDBG("%s alloc_skb failed\n", __func__);
 		return -ENOMEM;
 	}
 	err = cxio_hal_init_ctrl_cq(rdev_p);
 	if (err) {
-		PDBG("%s err %d initializing ctrl_cq\n", __FUNCTION__, err);
+		PDBG("%s err %d initializing ctrl_cq\n", __func__, err);
 		goto err;
 	}
 	rdev_p->ctrl_qp.workq = dma_alloc_coherent(
@@ -521,7 +521,7 @@
 					&(rdev_p->ctrl_qp.dma_addr),
 					GFP_KERNEL);
 	if (!rdev_p->ctrl_qp.workq) {
-		PDBG("%s dma_alloc_coherent failed\n", __FUNCTION__);
+		PDBG("%s dma_alloc_coherent failed\n", __func__);
 		err = -ENOMEM;
 		goto err;
 	}
@@ -591,25 +591,25 @@
 	addr &= 0x7FFFFFF;
 	nr_wqe = len % 96 ? len / 96 + 1 : len / 96;	/* 96B max per WQE */
 	PDBG("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
-	     __FUNCTION__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
+	     __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
 	     nr_wqe, data, addr);
 	utx_len = 3;		/* in 32B unit */
 	for (i = 0; i < nr_wqe; i++) {
 		if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
 		           T3_CTRL_QP_SIZE_LOG2)) {
 			PDBG("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, "
-			     "wait for more space i %d\n", __FUNCTION__,
+			     "wait for more space i %d\n", __func__,
 			     rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
 			if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
 					     !Q_FULL(rdev_p->ctrl_qp.rptr,
 						     rdev_p->ctrl_qp.wptr,
 						     T3_CTRL_QP_SIZE_LOG2))) {
 				PDBG("%s ctrl_qp workq interrupted\n",
-				     __FUNCTION__);
+				     __func__);
 				return -ERESTARTSYS;
 			}
 			PDBG("%s ctrl_qp wakeup, continue posting work request "
-			     "i %d\n", __FUNCTION__, i);
+			     "i %d\n", __func__, i);
 		}
 		wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
 						(1 << T3_CTRL_QP_SIZE_LOG2)));
@@ -630,7 +630,7 @@
 		if ((i != 0) &&
 		    (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
 			flag = T3_COMPLETION_FLAG;
-			PDBG("%s force completion at i %d\n", __FUNCTION__, i);
+			PDBG("%s force completion at i %d\n", __func__, i);
 		}
 
 		/* build the utx mem command */
@@ -701,7 +701,7 @@
 		*stag = (stag_idx << 8) | ((*stag) & 0xFF);
 	}
 	PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
-	     __FUNCTION__, stag_state, type, pdid, stag_idx);
+	     __func__, stag_state, type, pdid, stag_idx);
 
 	if (reset_tpt_entry)
 		cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
@@ -718,7 +718,7 @@
 	if (pbl) {
 
 		PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
-		     __FUNCTION__, *pbl_addr, rdev_p->rnic_info.pbl_base,
+		     __func__, *pbl_addr, rdev_p->rnic_info.pbl_base,
 		     *pbl_size);
 		err = cxio_hal_ctrl_qp_write_mem(rdev_p,
 				(*pbl_addr >> 5),
@@ -814,7 +814,7 @@
 	struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC);
 	if (!skb)
 		return -ENOMEM;
-	PDBG("%s rdev_p %p\n", __FUNCTION__, rdev_p);
+	PDBG("%s rdev_p %p\n", __func__, rdev_p);
 	wqe = (struct t3_rdma_init_wr *) __skb_put(skb, sizeof(*wqe));
 	wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
 	wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
@@ -856,7 +856,7 @@
 	struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
 	PDBG("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x"
 	     " se %0x notify %0x cqbranch %0x creditth %0x\n",
-	     cnt, __FUNCTION__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
+	     cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
 	     RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
 	     RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
 	     RSPQ_CREDIT_THRESH(rsp_msg));
@@ -868,7 +868,7 @@
 	     CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
 	rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
 	if (!rdev_p) {
-		PDBG("%s called by t3cdev %p with null ulp\n", __FUNCTION__,
+		PDBG("%s called by t3cdev %p with null ulp\n", __func__,
 		     t3cdev_p);
 		return 0;
 	}
@@ -908,13 +908,13 @@
 		strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
 			T3_MAX_DEV_NAME_LEN);
 	} else {
-		PDBG("%s t3cdev_p or dev_name must be set\n", __FUNCTION__);
+		PDBG("%s t3cdev_p or dev_name must be set\n", __func__);
 		return -EINVAL;
 	}
 
 	list_add_tail(&rdev_p->entry, &rdev_list);
 
-	PDBG("%s opening rnic dev %s\n", __FUNCTION__, rdev_p->dev_name);
+	PDBG("%s opening rnic dev %s\n", __func__, rdev_p->dev_name);
 	memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
 	if (!rdev_p->t3cdev_p)
 		rdev_p->t3cdev_p = dev2t3cdev(netdev_p);
@@ -923,14 +923,14 @@
 					 &(rdev_p->rnic_info));
 	if (err) {
 		printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
-		     __FUNCTION__, rdev_p->t3cdev_p, err);
+		     __func__, rdev_p->t3cdev_p, err);
 		goto err1;
 	}
 	err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
 				    &(rdev_p->port_info));
 	if (err) {
 		printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
-		     __FUNCTION__, rdev_p->t3cdev_p, err);
+		     __func__, rdev_p->t3cdev_p, err);
 		goto err1;
 	}
 
@@ -947,7 +947,7 @@
 	rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
 	PDBG("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d "
 	     "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
-	     __FUNCTION__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
+	     __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
 	     rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
 	     rdev_p->rnic_info.pbl_base,
 	     rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
@@ -961,7 +961,7 @@
 	err = cxio_hal_init_ctrl_qp(rdev_p);
 	if (err) {
 		printk(KERN_ERR "%s error %d initializing ctrl_qp.\n",
-		       __FUNCTION__, err);
+		       __func__, err);
 		goto err1;
 	}
 	err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
@@ -969,19 +969,19 @@
 				     T3_MAX_NUM_PD);
 	if (err) {
 		printk(KERN_ERR "%s error %d initializing hal resources.\n",
-		       __FUNCTION__, err);
+		       __func__, err);
 		goto err2;
 	}
 	err = cxio_hal_pblpool_create(rdev_p);
 	if (err) {
 		printk(KERN_ERR "%s error %d initializing pbl mem pool.\n",
-		       __FUNCTION__, err);
+		       __func__, err);
 		goto err3;
 	}
 	err = cxio_hal_rqtpool_create(rdev_p);
 	if (err) {
 		printk(KERN_ERR "%s error %d initializing rqt mem pool.\n",
-		       __FUNCTION__, err);
+		       __func__, err);
 		goto err4;
 	}
 	return 0;
@@ -1043,7 +1043,7 @@
 			 * Insert this completed cqe into the swcq.
 			 */
 			PDBG("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
-			     __FUNCTION__, Q_PTR2IDX(ptr,  wq->sq_size_log2),
+			     __func__, Q_PTR2IDX(ptr,  wq->sq_size_log2),
 			     Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
 			sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
 			*(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
@@ -1112,7 +1112,7 @@
 
 	PDBG("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x"
 	     " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
-	     __FUNCTION__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
+	     __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
 	     CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
 	     CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
 	     CQE_WRID_LOW(*hw_cqe));
@@ -1215,7 +1215,7 @@
 		struct t3_swsq *sqp;
 
 		PDBG("%s out of order completion going in swsq at idx %ld\n",
-		     __FUNCTION__,
+		     __func__,
 		     Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2));
 		sqp = wq->sq +
 		      Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
@@ -1234,13 +1234,13 @@
 	 */
 	if (SQ_TYPE(*hw_cqe)) {
 		wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
-		PDBG("%s completing sq idx %ld\n", __FUNCTION__,
+		PDBG("%s completing sq idx %ld\n", __func__,
 		     Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
 		*cookie = (wq->sq +
 			   Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id;
 		wq->sq_rptr++;
 	} else {
-		PDBG("%s completing rq idx %ld\n", __FUNCTION__,
+		PDBG("%s completing rq idx %ld\n", __func__,
 		     Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
 		*cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
 		wq->rq_rptr++;
@@ -1255,11 +1255,11 @@
 skip_cqe:
 	if (SW_CQE(*hw_cqe)) {
 		PDBG("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
-		     __FUNCTION__, cq, cq->cqid, cq->sw_rptr);
+		     __func__, cq, cq->cqid, cq->sw_rptr);
 		++cq->sw_rptr;
 	} else {
 		PDBG("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
-		     __FUNCTION__, cq, cq->cqid, cq->rptr);
+		     __func__, cq, cq->cqid, cq->rptr);
 		++cq->rptr;
 
 		/*
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index d3095ae..45ed4f2 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -206,13 +206,13 @@
 u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 {
 	u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo);
-	PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
+	PDBG("%s qpid 0x%x\n", __func__, qpid);
 	return qpid;
 }
 
 void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
 {
-	PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
+	PDBG("%s qpid 0x%x\n", __func__, qpid);
 	cxio_hal_put_resource(rscp->qpid_fifo, qpid);
 }
 
@@ -255,13 +255,13 @@
 u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
 {
 	unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size);
-	PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size);
+	PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
 	return (u32)addr;
 }
 
 void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
 {
-	PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size);
+	PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
 	gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size);
 }
 
@@ -292,13 +292,13 @@
 u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size)
 {
 	unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6);
-	PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size << 6);
+	PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
 	return (u32)addr;
 }
 
 void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
 {
-	PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size << 6);
+	PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6);
 	gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6);
 }
 
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 0315c9d..6ba4138 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -65,7 +65,7 @@
 
 static void rnic_init(struct iwch_dev *rnicp)
 {
-	PDBG("%s iwch_dev %p\n", __FUNCTION__,  rnicp);
+	PDBG("%s iwch_dev %p\n", __func__,  rnicp);
 	idr_init(&rnicp->cqidr);
 	idr_init(&rnicp->qpidr);
 	idr_init(&rnicp->mmidr);
@@ -106,7 +106,7 @@
 	struct iwch_dev *rnicp;
 	static int vers_printed;
 
-	PDBG("%s t3cdev %p\n", __FUNCTION__,  tdev);
+	PDBG("%s t3cdev %p\n", __func__,  tdev);
 	if (!vers_printed++)
 		printk(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
 		       DRV_VERSION);
@@ -144,7 +144,7 @@
 static void close_rnic_dev(struct t3cdev *tdev)
 {
 	struct iwch_dev *dev, *tmp;
-	PDBG("%s t3cdev %p\n", __FUNCTION__,  tdev);
+	PDBG("%s t3cdev %p\n", __func__,  tdev);
 	mutex_lock(&dev_mutex);
 	list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
 		if (dev->rdev.t3cdev_p == tdev) {
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index caf4e60..9ad9b1e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -147,7 +147,7 @@
 				void *handle, u32 id)
 {
 	int ret;
-	u32 newid;
+	int newid;
 
 	do {
 		if (!idr_pre_get(idr, GFP_KERNEL)) {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 99f2f2a..72ca360 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -110,9 +110,9 @@
 
 static void start_ep_timer(struct iwch_ep *ep)
 {
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	if (timer_pending(&ep->timer)) {
-		PDBG("%s stopped / restarted timer ep %p\n", __FUNCTION__, ep);
+		PDBG("%s stopped / restarted timer ep %p\n", __func__, ep);
 		del_timer_sync(&ep->timer);
 	} else
 		get_ep(&ep->com);
@@ -124,7 +124,7 @@
 
 static void stop_ep_timer(struct iwch_ep *ep)
 {
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	del_timer_sync(&ep->timer);
 	put_ep(&ep->com);
 }
@@ -190,7 +190,7 @@
 
 static void set_emss(struct iwch_ep *ep, u16 opt)
 {
-	PDBG("%s ep %p opt %u\n", __FUNCTION__, ep, opt);
+	PDBG("%s ep %p opt %u\n", __func__, ep, opt);
 	ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
 	if (G_TCPOPT_TSTAMP(opt))
 		ep->emss -= 12;
@@ -220,7 +220,7 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&epc->lock, flags);
-	PDBG("%s - %s -> %s\n", __FUNCTION__, states[epc->state], states[new]);
+	PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
 	__state_set(epc, new);
 	spin_unlock_irqrestore(&epc->lock, flags);
 	return;
@@ -236,7 +236,7 @@
 		spin_lock_init(&epc->lock);
 		init_waitqueue_head(&epc->waitq);
 	}
-	PDBG("%s alloc ep %p\n", __FUNCTION__, epc);
+	PDBG("%s alloc ep %p\n", __func__, epc);
 	return epc;
 }
 
@@ -244,13 +244,13 @@
 {
 	struct iwch_ep_common *epc;
 	epc = container_of(kref, struct iwch_ep_common, kref);
-	PDBG("%s ep %p state %s\n", __FUNCTION__, epc, states[state_read(epc)]);
+	PDBG("%s ep %p state %s\n", __func__, epc, states[state_read(epc)]);
 	kfree(epc);
 }
 
 static void release_ep_resources(struct iwch_ep *ep)
 {
-	PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid);
+	PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
 	cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
 	dst_release(ep->dst);
 	l2t_release(L2DATA(ep->com.tdev), ep->l2t);
@@ -349,7 +349,7 @@
 
 static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
 {
-	PDBG("%s t3cdev %p\n", __FUNCTION__, dev);
+	PDBG("%s t3cdev %p\n", __func__, dev);
 	kfree_skb(skb);
 }
 
@@ -370,7 +370,7 @@
 {
 	struct cpl_abort_req *req = cplhdr(skb);
 
-	PDBG("%s t3cdev %p\n", __FUNCTION__, dev);
+	PDBG("%s t3cdev %p\n", __func__, dev);
 	req->cmd = CPL_ABORT_NO_RST;
 	cxgb3_ofld_send(dev, skb);
 }
@@ -380,10 +380,10 @@
 	struct cpl_close_con_req *req;
 	struct sk_buff *skb;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	skb = get_skb(NULL, sizeof(*req), gfp);
 	if (!skb) {
-		printk(KERN_ERR MOD "%s - failed to alloc skb\n", __FUNCTION__);
+		printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
 		return -ENOMEM;
 	}
 	skb->priority = CPL_PRIORITY_DATA;
@@ -400,11 +400,11 @@
 {
 	struct cpl_abort_req *req;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	skb = get_skb(skb, sizeof(*req), gfp);
 	if (!skb) {
 		printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
-		       __FUNCTION__);
+		       __func__);
 		return -ENOMEM;
 	}
 	skb->priority = CPL_PRIORITY_DATA;
@@ -426,12 +426,12 @@
 	unsigned int mtu_idx;
 	int wscale;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 
 	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 	if (!skb) {
 		printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
-		       __FUNCTION__);
+		       __func__);
 		return -ENOMEM;
 	}
 	mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
@@ -470,7 +470,7 @@
 	struct mpa_message *mpa;
 	int len;
 
-	PDBG("%s ep %p pd_len %d\n", __FUNCTION__, ep, ep->plen);
+	PDBG("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
 
 	BUG_ON(skb_cloned(skb));
 
@@ -530,13 +530,13 @@
 	struct mpa_message *mpa;
 	struct sk_buff *skb;
 
-	PDBG("%s ep %p plen %d\n", __FUNCTION__, ep, plen);
+	PDBG("%s ep %p plen %d\n", __func__, ep, plen);
 
 	mpalen = sizeof(*mpa) + plen;
 
 	skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
 	if (!skb) {
-		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __FUNCTION__);
+		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
 		return -ENOMEM;
 	}
 	skb_reserve(skb, sizeof(*req));
@@ -580,13 +580,13 @@
 	int len;
 	struct sk_buff *skb;
 
-	PDBG("%s ep %p plen %d\n", __FUNCTION__, ep, plen);
+	PDBG("%s ep %p plen %d\n", __func__, ep, plen);
 
 	mpalen = sizeof(*mpa) + plen;
 
 	skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
 	if (!skb) {
-		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __FUNCTION__);
+		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
 		return -ENOMEM;
 	}
 	skb->priority = CPL_PRIORITY_DATA;
@@ -630,7 +630,7 @@
 	struct cpl_act_establish *req = cplhdr(skb);
 	unsigned int tid = GET_TID(req);
 
-	PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, tid);
+	PDBG("%s ep %p tid %d\n", __func__, ep, tid);
 
 	dst_confirm(ep->dst);
 
@@ -663,7 +663,7 @@
 {
 	struct iw_cm_event event;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_CLOSE;
 	if (ep->com.cm_id) {
@@ -680,7 +680,7 @@
 {
 	struct iw_cm_event event;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_DISCONNECT;
 	if (ep->com.cm_id) {
@@ -694,7 +694,7 @@
 {
 	struct iw_cm_event event;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_CLOSE;
 	event.status = -ECONNRESET;
@@ -712,7 +712,7 @@
 {
 	struct iw_cm_event event;
 
-	PDBG("%s ep %p status %d\n", __FUNCTION__, ep, status);
+	PDBG("%s ep %p status %d\n", __func__, ep, status);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_CONNECT_REPLY;
 	event.status = status;
@@ -724,7 +724,7 @@
 		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 	}
 	if (ep->com.cm_id) {
-		PDBG("%s ep %p tid %d status %d\n", __FUNCTION__, ep,
+		PDBG("%s ep %p tid %d status %d\n", __func__, ep,
 		     ep->hwtid, status);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 	}
@@ -739,7 +739,7 @@
 {
 	struct iw_cm_event event;
 
-	PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid);
+	PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_CONNECT_REQUEST;
 	event.local_addr = ep->com.local_addr;
@@ -759,11 +759,11 @@
 {
 	struct iw_cm_event event;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_ESTABLISHED;
 	if (ep->com.cm_id) {
-		PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid);
+		PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 	}
 }
@@ -773,7 +773,7 @@
 	struct cpl_rx_data_ack *req;
 	struct sk_buff *skb;
 
-	PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits);
+	PDBG("%s ep %p credits %u\n", __func__, ep, credits);
 	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 	if (!skb) {
 		printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
@@ -797,7 +797,7 @@
 	enum iwch_qp_attr_mask mask;
 	int err;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 
 	/*
 	 * Stop mpa timer.  If it expired, then the state has
@@ -884,7 +884,7 @@
 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
 	ep->mpa_attr.version = mpa_rev;
 	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
-	     "xmit_marker_enabled=%d, version=%d\n", __FUNCTION__,
+	     "xmit_marker_enabled=%d, version=%d\n", __func__,
 	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
 	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
 
@@ -915,7 +915,7 @@
 	struct mpa_message *mpa;
 	u16 plen;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 
 	/*
 	 * Stop mpa timer.  If it expired, then the state has
@@ -935,7 +935,7 @@
 		return;
 	}
 
-	PDBG("%s enter (%s line %u)\n", __FUNCTION__, __FILE__, __LINE__);
+	PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
 
 	/*
 	 * Copy the new data into our accumulation buffer.
@@ -950,7 +950,7 @@
 	 */
 	if (ep->mpa_pkt_len < sizeof(*mpa))
 		return;
-	PDBG("%s enter (%s line %u)\n", __FUNCTION__, __FILE__, __LINE__);
+	PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
 	mpa = (struct mpa_message *) ep->mpa_pkt;
 
 	/*
@@ -1000,7 +1000,7 @@
 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
 	ep->mpa_attr.version = mpa_rev;
 	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
-	     "xmit_marker_enabled=%d, version=%d\n", __FUNCTION__,
+	     "xmit_marker_enabled=%d, version=%d\n", __func__,
 	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
 	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
 
@@ -1017,7 +1017,7 @@
 	struct cpl_rx_data *hdr = cplhdr(skb);
 	unsigned int dlen = ntohs(hdr->len);
 
-	PDBG("%s ep %p dlen %u\n", __FUNCTION__, ep, dlen);
+	PDBG("%s ep %p dlen %u\n", __func__, ep, dlen);
 
 	skb_pull(skb, sizeof(*hdr));
 	skb_trim(skb, dlen);
@@ -1037,7 +1037,7 @@
 	default:
 		printk(KERN_ERR MOD "%s Unexpected streaming data."
 		       " ep %p state %d tid %d\n",
-		       __FUNCTION__, ep, state_read(&ep->com), ep->hwtid);
+		       __func__, ep, state_read(&ep->com), ep->hwtid);
 
 		/*
 		 * The ep will timeout and inform the ULP of the failure.
@@ -1063,7 +1063,7 @@
 	struct cpl_wr_ack *hdr = cplhdr(skb);
 	unsigned int credits = ntohs(hdr->credits);
 
-	PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits);
+	PDBG("%s ep %p credits %u\n", __func__, ep, credits);
 
 	if (credits == 0)
 		return CPL_RET_BUF_DONE;
@@ -1084,7 +1084,7 @@
 {
 	struct iwch_ep *ep = ctx;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 
 	/*
 	 * We get 2 abort replies from the HW.  The first one must
@@ -1115,7 +1115,7 @@
 	struct iwch_ep *ep = ctx;
 	struct cpl_act_open_rpl *rpl = cplhdr(skb);
 
-	PDBG("%s ep %p status %u errno %d\n", __FUNCTION__, ep, rpl->status,
+	PDBG("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
 	     status2errno(rpl->status));
 	connect_reply_upcall(ep, status2errno(rpl->status));
 	state_set(&ep->com, DEAD);
@@ -1133,7 +1133,7 @@
 	struct sk_buff *skb;
 	struct cpl_pass_open_req *req;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 	if (!skb) {
 		printk(KERN_ERR MOD "t3c_listen_start failed to alloc skb!\n");
@@ -1162,7 +1162,7 @@
 	struct iwch_listen_ep *ep = ctx;
 	struct cpl_pass_open_rpl *rpl = cplhdr(skb);
 
-	PDBG("%s ep %p status %d error %d\n", __FUNCTION__, ep,
+	PDBG("%s ep %p status %d error %d\n", __func__, ep,
 	     rpl->status, status2errno(rpl->status));
 	ep->com.rpl_err = status2errno(rpl->status);
 	ep->com.rpl_done = 1;
@@ -1176,10 +1176,10 @@
 	struct sk_buff *skb;
 	struct cpl_close_listserv_req *req;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 	if (!skb) {
-		printk(KERN_ERR MOD "%s - failed to alloc skb\n", __FUNCTION__);
+		printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
 		return -ENOMEM;
 	}
 	req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req));
@@ -1197,7 +1197,7 @@
 	struct iwch_listen_ep *ep = ctx;
 	struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	ep->com.rpl_err = status2errno(rpl->status);
 	ep->com.rpl_done = 1;
 	wake_up(&ep->com.waitq);
@@ -1211,7 +1211,7 @@
 	u32 opt0h, opt0l, opt2;
 	int wscale;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	BUG_ON(skb_cloned(skb));
 	skb_trim(skb, sizeof(*rpl));
 	skb_get(skb);
@@ -1244,7 +1244,7 @@
 static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
 		      struct sk_buff *skb)
 {
-	PDBG("%s t3cdev %p tid %u peer_ip %x\n", __FUNCTION__, tdev, hwtid,
+	PDBG("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
 	     peer_ip);
 	BUG_ON(skb_cloned(skb));
 	skb_trim(skb, sizeof(struct cpl_tid_release));
@@ -1279,11 +1279,11 @@
 	struct rtable *rt;
 	struct iff_mac tim;
 
-	PDBG("%s parent ep %p tid %u\n", __FUNCTION__, parent_ep, hwtid);
+	PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
 
 	if (state_read(&parent_ep->com) != LISTEN) {
 		printk(KERN_ERR "%s - listening ep not in LISTEN\n",
-		       __FUNCTION__);
+		       __func__);
 		goto reject;
 	}
 
@@ -1295,7 +1295,7 @@
 	if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
 		printk(KERN_ERR
 			"%s bad dst mac %02x %02x %02x %02x %02x %02x\n",
-			__FUNCTION__,
+			__func__,
 			req->dst_mac[0],
 			req->dst_mac[1],
 			req->dst_mac[2],
@@ -1313,21 +1313,21 @@
 			req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
 	if (!rt) {
 		printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
-		       __FUNCTION__);
+		       __func__);
 		goto reject;
 	}
 	dst = &rt->u.dst;
 	l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev);
 	if (!l2t) {
 		printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
-		       __FUNCTION__);
+		       __func__);
 		dst_release(dst);
 		goto reject;
 	}
 	child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
 	if (!child_ep) {
 		printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
-		       __FUNCTION__);
+		       __func__);
 		l2t_release(L2DATA(tdev), l2t);
 		dst_release(dst);
 		goto reject;
@@ -1362,7 +1362,7 @@
 	struct iwch_ep *ep = ctx;
 	struct cpl_pass_establish *req = cplhdr(skb);
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	ep->snd_seq = ntohl(req->snd_isn);
 	ep->rcv_seq = ntohl(req->rcv_isn);
 
@@ -1383,7 +1383,7 @@
 	int disconnect = 1;
 	int release = 0;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	dst_confirm(ep->dst);
 
 	spin_lock_irqsave(&ep->com.lock, flags);
@@ -1473,7 +1473,7 @@
 	int state;
 
 	if (is_neg_adv_abort(req->status)) {
-		PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep,
+		PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
 		     ep->hwtid);
 		t3_l2t_send_event(ep->com.tdev, ep->l2t);
 		return CPL_RET_BUF_DONE;
@@ -1489,7 +1489,7 @@
 	}
 
 	state = state_read(&ep->com);
-	PDBG("%s ep %p state %u\n", __FUNCTION__, ep, state);
+	PDBG("%s ep %p state %u\n", __func__, ep, state);
 	switch (state) {
 	case CONNECTING:
 		break;
@@ -1528,14 +1528,14 @@
 			if (ret)
 				printk(KERN_ERR MOD
 				       "%s - qp <- error failed!\n",
-				       __FUNCTION__);
+				       __func__);
 		}
 		peer_abort_upcall(ep);
 		break;
 	case ABORTING:
 		break;
 	case DEAD:
-		PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __FUNCTION__);
+		PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
 		return CPL_RET_BUF_DONE;
 	default:
 		BUG_ON(1);
@@ -1546,7 +1546,7 @@
 	rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
 	if (!rpl_skb) {
 		printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
-		       __FUNCTION__);
+		       __func__);
 		dst_release(ep->dst);
 		l2t_release(L2DATA(ep->com.tdev), ep->l2t);
 		put_ep(&ep->com);
@@ -1573,7 +1573,7 @@
 	unsigned long flags;
 	int release = 0;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	BUG_ON(!ep);
 
 	/* The cm_id may be null if we failed to connect */
@@ -1624,9 +1624,9 @@
 {
 	struct iwch_ep *ep = ctx;
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	skb_pull(skb, sizeof(struct cpl_rdma_terminate));
-	PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len);
+	PDBG("%s saving %d bytes of term msg\n", __func__, skb->len);
 	skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
 				  skb->len);
 	ep->com.qp->attr.terminate_msg_len = skb->len;
@@ -1639,13 +1639,13 @@
 	struct cpl_rdma_ec_status *rep = cplhdr(skb);
 	struct iwch_ep *ep = ctx;
 
-	PDBG("%s ep %p tid %u status %d\n", __FUNCTION__, ep, ep->hwtid,
+	PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
 	     rep->status);
 	if (rep->status) {
 		struct iwch_qp_attributes attrs;
 
 		printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n",
-		       __FUNCTION__, ep->hwtid);
+		       __func__, ep->hwtid);
 		stop_ep_timer(ep);
 		attrs.next_state = IWCH_QP_STATE_ERROR;
 		iwch_modify_qp(ep->com.qp->rhp,
@@ -1663,7 +1663,7 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&ep->com.lock, flags);
-	PDBG("%s ep %p tid %u state %d\n", __FUNCTION__, ep, ep->hwtid,
+	PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
 	     ep->com.state);
 	switch (ep->com.state) {
 	case MPA_REQ_SENT:
@@ -1693,7 +1693,7 @@
 {
 	int err;
 	struct iwch_ep *ep = to_ep(cm_id);
-	PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid);
+	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
 
 	if (state_read(&ep->com) == DEAD) {
 		put_ep(&ep->com);
@@ -1718,7 +1718,7 @@
 	struct iwch_dev *h = to_iwch_dev(cm_id->device);
 	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
 
-	PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid);
+	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
 	if (state_read(&ep->com) == DEAD)
 		return -ECONNRESET;
 
@@ -1739,7 +1739,7 @@
 	ep->com.rpl_err = 0;
 	ep->ird = conn_param->ird;
 	ep->ord = conn_param->ord;
-	PDBG("%s %d ird %d ord %d\n", __FUNCTION__, __LINE__, ep->ird, ep->ord);
+	PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
 
 	get_ep(&ep->com);
 
@@ -1810,7 +1810,7 @@
 
 	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
 	if (!ep) {
-		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__);
+		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
 		err = -ENOMEM;
 		goto out;
 	}
@@ -1827,7 +1827,7 @@
 	ep->com.cm_id = cm_id;
 	ep->com.qp = get_qhp(h, conn_param->qpn);
 	BUG_ON(!ep->com.qp);
-	PDBG("%s qpn 0x%x qp %p cm_id %p\n", __FUNCTION__, conn_param->qpn,
+	PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
 	     ep->com.qp, cm_id);
 
 	/*
@@ -1835,7 +1835,7 @@
 	 */
 	ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
 	if (ep->atid == -1) {
-		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __FUNCTION__);
+		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
 		err = -ENOMEM;
 		goto fail2;
 	}
@@ -1847,7 +1847,7 @@
 			cm_id->local_addr.sin_port,
 			cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
 	if (!rt) {
-		printk(KERN_ERR MOD "%s - cannot find route.\n", __FUNCTION__);
+		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
 		err = -EHOSTUNREACH;
 		goto fail3;
 	}
@@ -1857,7 +1857,7 @@
 	ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour,
 			     ep->dst->neighbour->dev);
 	if (!ep->l2t) {
-		printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __FUNCTION__);
+		printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
 		err = -ENOMEM;
 		goto fail4;
 	}
@@ -1894,11 +1894,11 @@
 
 	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
 	if (!ep) {
-		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__);
+		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
 		err = -ENOMEM;
 		goto fail1;
 	}
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 	ep->com.tdev = h->rdev.t3cdev_p;
 	cm_id->add_ref(cm_id);
 	ep->com.cm_id = cm_id;
@@ -1910,7 +1910,7 @@
 	 */
 	ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
 	if (ep->stid == -1) {
-		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __FUNCTION__);
+		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
 		err = -ENOMEM;
 		goto fail2;
 	}
@@ -1942,7 +1942,7 @@
 	int err;
 	struct iwch_listen_ep *ep = to_listen_ep(cm_id);
 
-	PDBG("%s ep %p\n", __FUNCTION__, ep);
+	PDBG("%s ep %p\n", __func__, ep);
 
 	might_sleep();
 	state_set(&ep->com, DEAD);
@@ -1965,11 +1965,11 @@
 
 	spin_lock_irqsave(&ep->com.lock, flags);
 
-	PDBG("%s ep %p state %s, abrupt %d\n", __FUNCTION__, ep,
+	PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
 	     states[ep->com.state], abrupt);
 
 	if (ep->com.state == DEAD) {
-		PDBG("%s already dead ep %p\n", __FUNCTION__, ep);
+		PDBG("%s already dead ep %p\n", __func__, ep);
 		goto out;
 	}
 
@@ -2020,7 +2020,7 @@
 	if (ep->dst != old)
 		return 0;
 
-	PDBG("%s ep %p redirect to dst %p l2t %p\n", __FUNCTION__, ep, new,
+	PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
 	     l2t);
 	dst_hold(new);
 	l2t_release(L2DATA(ep->com.tdev), ep->l2t);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index 6107e7c..2bb7fbd 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -54,13 +54,13 @@
 #define MPA_FLAGS_MASK		0xE0
 
 #define put_ep(ep) { \
-	PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__,  \
+	PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__,  \
 	     ep, atomic_read(&((ep)->kref.refcount))); \
 	kref_put(&((ep)->kref), __free_ep); \
 }
 
 #define get_ep(ep) { \
-	PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __FUNCTION__, __LINE__, \
+	PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
 	     ep, atomic_read(&((ep)->kref.refcount))); \
 	kref_get(&((ep)->kref));  \
 }
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index d7624c1..4ee8ccd 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -67,7 +67,7 @@
 	ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
 				   &credit);
 	if (t3a_device(chp->rhp) && credit) {
-		PDBG("%s updating %d cq credits on id %d\n", __FUNCTION__,
+		PDBG("%s updating %d cq credits on id %d\n", __func__,
 		     credit, chp->cq.cqid);
 		cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
 	}
@@ -83,7 +83,7 @@
 	wc->vendor_err = CQE_STATUS(cqe);
 
 	PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x "
-	     "lo 0x%x cookie 0x%llx\n", __FUNCTION__,
+	     "lo 0x%x cookie 0x%llx\n", __func__,
 	     CQE_QPID(cqe), CQE_TYPE(cqe),
 	     CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
 	     CQE_WRID_LOW(cqe), (unsigned long long) cookie);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
index b406766..7b67a67 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -52,7 +52,7 @@
 
 	if (!qhp) {
 		printk(KERN_ERR "%s unaffiliated error 0x%x qpid 0x%x\n",
-		       __FUNCTION__, CQE_STATUS(rsp_msg->cqe),
+		       __func__, CQE_STATUS(rsp_msg->cqe),
 		       CQE_QPID(rsp_msg->cqe));
 		spin_unlock(&rnicp->lock);
 		return;
@@ -61,14 +61,14 @@
 	if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
 	    (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
 		PDBG("%s AE received after RTS - "
-		     "qp state %d qpid 0x%x status 0x%x\n", __FUNCTION__,
+		     "qp state %d qpid 0x%x status 0x%x\n", __func__,
 		     qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe));
 		spin_unlock(&rnicp->lock);
 		return;
 	}
 
 	printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x "
-	       "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
+	       "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __func__,
 	       CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
 	       CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
 	       CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
@@ -132,10 +132,10 @@
 	    (CQE_STATUS(rsp_msg->cqe) == 0)) {
 		if (SQ_TYPE(rsp_msg->cqe)) {
 			PDBG("%s QPID 0x%x ep %p disconnecting\n",
-			     __FUNCTION__, qhp->wq.qpid, qhp->ep);
+			     __func__, qhp->wq.qpid, qhp->ep);
 			iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
 		} else {
-			PDBG("%s post REQ_ERR AE QPID 0x%x\n", __FUNCTION__,
+			PDBG("%s post REQ_ERR AE QPID 0x%x\n", __func__,
 			     qhp->wq.qpid);
 			post_qp_event(rnicp, chp, rsp_msg,
 				      IB_EVENT_QP_REQ_ERR, 0);
@@ -180,7 +180,7 @@
 	case TPT_ERR_INVALIDATE_SHARED_MR:
 	case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
 		printk(KERN_ERR "%s - CQE Err qpid 0x%x opcode %d status 0x%x "
-		       "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
+		       "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __func__,
 		       CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
 		       CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
 		       CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index b8797c6..58c3d61 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -62,7 +62,7 @@
 	mmid = stag >> 8;
 	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
 	insert_handle(rhp, &rhp->mmidr, mhp, mmid);
-	PDBG("%s mmid 0x%x mhp %p\n", __FUNCTION__, mmid, mhp);
+	PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
 	return 0;
 }
 
@@ -96,7 +96,7 @@
 	mmid = stag >> 8;
 	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
 	insert_handle(rhp, &rhp->mmidr, mhp, mmid);
-	PDBG("%s mmid 0x%x mhp %p\n", __FUNCTION__, mmid, mhp);
+	PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
 	return 0;
 }
 
@@ -163,7 +163,7 @@
 			    ((u64) j << *shift));
 
 	PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
-	     __FUNCTION__, (unsigned long long) *iova_start,
+	     __func__, (unsigned long long) *iova_start,
 	     (unsigned long long) mask, *shift, (unsigned long long) *total_size,
 	     *npages);
 
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index b2ea921..ca72654 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -101,7 +101,7 @@
 	struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
 	struct iwch_mm_entry *mm, *tmp;
 
-	PDBG("%s context %p\n", __FUNCTION__, context);
+	PDBG("%s context %p\n", __func__, context);
 	list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
 		kfree(mm);
 	cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
@@ -115,7 +115,7 @@
 	struct iwch_ucontext *context;
 	struct iwch_dev *rhp = to_iwch_dev(ibdev);
 
-	PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+	PDBG("%s ibdev %p\n", __func__, ibdev);
 	context = kzalloc(sizeof(*context), GFP_KERNEL);
 	if (!context)
 		return ERR_PTR(-ENOMEM);
@@ -129,7 +129,7 @@
 {
 	struct iwch_cq *chp;
 
-	PDBG("%s ib_cq %p\n", __FUNCTION__, ib_cq);
+	PDBG("%s ib_cq %p\n", __func__, ib_cq);
 	chp = to_iwch_cq(ib_cq);
 
 	remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
@@ -151,7 +151,7 @@
 	struct iwch_create_cq_req ureq;
 	struct iwch_ucontext *ucontext = NULL;
 
-	PDBG("%s ib_dev %p entries %d\n", __FUNCTION__, ibdev, entries);
+	PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
 	rhp = to_iwch_dev(ibdev);
 	chp = kzalloc(sizeof(*chp), GFP_KERNEL);
 	if (!chp)
@@ -233,7 +233,7 @@
 	struct t3_cq oldcq, newcq;
 	int ret;
 
-	PDBG("%s ib_cq %p cqe %d\n", __FUNCTION__, cq, cqe);
+	PDBG("%s ib_cq %p cqe %d\n", __func__, cq, cqe);
 
 	/* We don't downsize... */
 	if (cqe <= cq->cqe)
@@ -281,7 +281,7 @@
 	ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
 	if (ret) {
 		printk(KERN_ERR MOD "%s - cxio_destroy_cq failed %d\n",
-			__FUNCTION__, ret);
+			__func__, ret);
 	}
 
 	/* add user hooks here */
@@ -316,7 +316,7 @@
 		chp->cq.rptr = rptr;
 	} else
 		spin_lock_irqsave(&chp->lock, flag);
-	PDBG("%s rptr 0x%x\n", __FUNCTION__, chp->cq.rptr);
+	PDBG("%s rptr 0x%x\n", __func__, chp->cq.rptr);
 	err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
 	spin_unlock_irqrestore(&chp->lock, flag);
 	if (err < 0)
@@ -337,7 +337,7 @@
 	struct iwch_ucontext *ucontext;
 	u64 addr;
 
-	PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __FUNCTION__, vma->vm_pgoff,
+	PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
 	     key, len);
 
 	if (vma->vm_start & (PAGE_SIZE-1)) {
@@ -390,7 +390,7 @@
 
 	php = to_iwch_pd(pd);
 	rhp = php->rhp;
-	PDBG("%s ibpd %p pdid 0x%x\n", __FUNCTION__, pd, php->pdid);
+	PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
 	cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
 	kfree(php);
 	return 0;
@@ -404,7 +404,7 @@
 	u32 pdid;
 	struct iwch_dev *rhp;
 
-	PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+	PDBG("%s ibdev %p\n", __func__, ibdev);
 	rhp = (struct iwch_dev *) ibdev;
 	pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
 	if (!pdid)
@@ -422,7 +422,7 @@
 			return ERR_PTR(-EFAULT);
 		}
 	}
-	PDBG("%s pdid 0x%0x ptr 0x%p\n", __FUNCTION__, pdid, php);
+	PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
 	return &php->ibpd;
 }
 
@@ -432,7 +432,7 @@
 	struct iwch_mr *mhp;
 	u32 mmid;
 
-	PDBG("%s ib_mr %p\n", __FUNCTION__, ib_mr);
+	PDBG("%s ib_mr %p\n", __func__, ib_mr);
 	/* There can be no memory windows */
 	if (atomic_read(&ib_mr->usecnt))
 		return -EINVAL;
@@ -447,7 +447,7 @@
 		kfree((void *) (unsigned long) mhp->kva);
 	if (mhp->umem)
 		ib_umem_release(mhp->umem);
-	PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp);
+	PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
 	kfree(mhp);
 	return 0;
 }
@@ -467,7 +467,7 @@
 	struct iwch_mr *mhp;
 	int ret;
 
-	PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
+	PDBG("%s ib_pd %p\n", __func__, pd);
 	php = to_iwch_pd(pd);
 	rhp = php->rhp;
 
@@ -531,7 +531,7 @@
 	int npages;
 	int ret;
 
-	PDBG("%s ib_mr %p ib_pd %p\n", __FUNCTION__, mr, pd);
+	PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
 
 	/* There can be no memory windows */
 	if (atomic_read(&mr->usecnt))
@@ -594,7 +594,7 @@
 	struct iwch_mr *mhp;
 	struct iwch_reg_user_mr_resp uresp;
 
-	PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
+	PDBG("%s ib_pd %p\n", __func__, pd);
 
 	php = to_iwch_pd(pd);
 	rhp = php->rhp;
@@ -649,7 +649,7 @@
 	if (udata && !t3a_device(rhp)) {
 		uresp.pbl_addr = (mhp->attr.pbl_addr -
 	                         rhp->rdev.rnic_info.pbl_base) >> 3;
-		PDBG("%s user resp pbl_addr 0x%x\n", __FUNCTION__,
+		PDBG("%s user resp pbl_addr 0x%x\n", __func__,
 		     uresp.pbl_addr);
 
 		if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
@@ -673,7 +673,7 @@
 	u64 kva;
 	struct ib_mr *ibmr;
 
-	PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
+	PDBG("%s ib_pd %p\n", __func__, pd);
 
 	/*
 	 * T3 only supports 32 bits of size.
@@ -710,7 +710,7 @@
 	mhp->attr.stag = stag;
 	mmid = (stag) >> 8;
 	insert_handle(rhp, &rhp->mmidr, mhp, mmid);
-	PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __FUNCTION__, mmid, mhp, stag);
+	PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
 	return &(mhp->ibmw);
 }
 
@@ -726,7 +726,7 @@
 	cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
 	remove_handle(rhp, &rhp->mmidr, mmid);
 	kfree(mhp);
-	PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __FUNCTION__, mw, mmid, mhp);
+	PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
 	return 0;
 }
 
@@ -754,7 +754,7 @@
 	cxio_destroy_qp(&rhp->rdev, &qhp->wq,
 			ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
 
-	PDBG("%s ib_qp %p qpid 0x%0x qhp %p\n", __FUNCTION__,
+	PDBG("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__,
 	     ib_qp, qhp->wq.qpid, qhp);
 	kfree(qhp);
 	return 0;
@@ -773,7 +773,7 @@
 	int wqsize, sqsize, rqsize;
 	struct iwch_ucontext *ucontext;
 
-	PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
+	PDBG("%s ib_pd %p\n", __func__, pd);
 	if (attrs->qp_type != IB_QPT_RC)
 		return ERR_PTR(-EINVAL);
 	php = to_iwch_pd(pd);
@@ -805,7 +805,7 @@
 	 */
 	sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
 	wqsize = roundup_pow_of_two(rqsize + sqsize);
-	PDBG("%s wqsize %d sqsize %d rqsize %d\n", __FUNCTION__,
+	PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__,
 	     wqsize, sqsize, rqsize);
 	qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
 	if (!qhp)
@@ -898,7 +898,7 @@
 	init_timer(&(qhp->timer));
 	PDBG("%s sq_num_entries %d, rq_num_entries %d "
 	     "qpid 0x%0x qhp %p dma_addr 0x%llx size %d\n",
-	     __FUNCTION__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
+	     __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
 	     qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr,
 	     1 << qhp->wq.size_log2);
 	return &qhp->ibqp;
@@ -912,7 +912,7 @@
 	enum iwch_qp_attr_mask mask = 0;
 	struct iwch_qp_attributes attrs;
 
-	PDBG("%s ib_qp %p\n", __FUNCTION__, ibqp);
+	PDBG("%s ib_qp %p\n", __func__, ibqp);
 
 	/* iwarp does not support the RTR state */
 	if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
@@ -945,20 +945,20 @@
 
 void iwch_qp_add_ref(struct ib_qp *qp)
 {
-	PDBG("%s ib_qp %p\n", __FUNCTION__, qp);
+	PDBG("%s ib_qp %p\n", __func__, qp);
 	atomic_inc(&(to_iwch_qp(qp)->refcnt));
 }
 
 void iwch_qp_rem_ref(struct ib_qp *qp)
 {
-	PDBG("%s ib_qp %p\n", __FUNCTION__, qp);
+	PDBG("%s ib_qp %p\n", __func__, qp);
 	if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt)))
 	        wake_up(&(to_iwch_qp(qp)->wait));
 }
 
 static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
 {
-	PDBG("%s ib_dev %p qpn 0x%x\n", __FUNCTION__, dev, qpn);
+	PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
 	return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
 }
 
@@ -966,7 +966,7 @@
 static int iwch_query_pkey(struct ib_device *ibdev,
 			   u8 port, u16 index, u16 * pkey)
 {
-	PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+	PDBG("%s ibdev %p\n", __func__, ibdev);
 	*pkey = 0;
 	return 0;
 }
@@ -977,7 +977,7 @@
 	struct iwch_dev *dev;
 
 	PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
-	       __FUNCTION__, ibdev, port, index, gid);
+	       __func__, ibdev, port, index, gid);
 	dev = to_iwch_dev(ibdev);
 	BUG_ON(port == 0 || port > 2);
 	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
@@ -990,7 +990,7 @@
 {
 
 	struct iwch_dev *dev;
-	PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+	PDBG("%s ibdev %p\n", __func__, ibdev);
 
 	dev = to_iwch_dev(ibdev);
 	memset(props, 0, sizeof *props);
@@ -1017,7 +1017,7 @@
 static int iwch_query_port(struct ib_device *ibdev,
 			   u8 port, struct ib_port_attr *props)
 {
-	PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
+	PDBG("%s ibdev %p\n", __func__, ibdev);
 	props->max_mtu = IB_MTU_4096;
 	props->lid = 0;
 	props->lmc = 0;
@@ -1045,7 +1045,7 @@
 {
 	struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
 					    ibdev.class_dev);
-	PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
+	PDBG("%s class dev 0x%p\n", __func__, cdev);
 	return sprintf(buf, "%d\n", dev->rdev.t3cdev_p->type);
 }
 
@@ -1056,7 +1056,7 @@
 	struct ethtool_drvinfo info;
 	struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
 
-	PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
+	PDBG("%s class dev 0x%p\n", __func__, cdev);
 	rtnl_lock();
 	lldev->ethtool_ops->get_drvinfo(lldev, &info);
 	rtnl_unlock();
@@ -1070,7 +1070,7 @@
 	struct ethtool_drvinfo info;
 	struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
 
-	PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
+	PDBG("%s class dev 0x%p\n", __func__, cdev);
 	rtnl_lock();
 	lldev->ethtool_ops->get_drvinfo(lldev, &info);
 	rtnl_unlock();
@@ -1081,7 +1081,7 @@
 {
 	struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
 					    ibdev.class_dev);
-	PDBG("%s class dev 0x%p\n", __FUNCTION__, dev);
+	PDBG("%s class dev 0x%p\n", __func__, dev);
 	return sprintf(buf, "%x.%x\n", dev->rdev.rnic_info.pdev->vendor,
 		                       dev->rdev.rnic_info.pdev->device);
 }
@@ -1103,14 +1103,13 @@
 	int ret;
 	int i;
 
-	PDBG("%s iwch_dev %p\n", __FUNCTION__, dev);
+	PDBG("%s iwch_dev %p\n", __func__, dev);
 	strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
 	memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
 	memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
 	dev->ibdev.owner = THIS_MODULE;
 	dev->device_cap_flags =
-	    (IB_DEVICE_ZERO_STAG |
-	     IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+	    (IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW);
 
 	dev->ibdev.uverbs_cmd_mask =
 	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -1207,7 +1206,7 @@
 {
 	int i;
 
-	PDBG("%s iwch_dev %p\n", __FUNCTION__, dev);
+	PDBG("%s iwch_dev %p\n", __func__, dev);
 	for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
 		class_device_remove_file(&dev->ibdev.class_dev,
 					 iwch_class_attributes[i]);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 48833f3..61356f9 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -213,7 +213,7 @@
 		if (mm->key == key && mm->len == len) {
 			list_del_init(&mm->entry);
 			spin_unlock(&ucontext->mmap_lock);
-			PDBG("%s key 0x%x addr 0x%llx len %d\n", __FUNCTION__,
+			PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
 			     key, (unsigned long long) mm->addr, mm->len);
 			return mm;
 		}
@@ -226,7 +226,7 @@
 			       struct iwch_mm_entry *mm)
 {
 	spin_lock(&ucontext->mmap_lock);
-	PDBG("%s key 0x%x addr 0x%llx len %d\n", __FUNCTION__,
+	PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
 	     mm->key, (unsigned long long) mm->addr, mm->len);
 	list_add_tail(&mm->entry, &ucontext->mmaps);
 	spin_unlock(&ucontext->mmap_lock);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index ea2cdd7..8891c3b 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -72,7 +72,7 @@
 	wqe->send.reserved[2] = 0;
 	if (wr->opcode == IB_WR_SEND_WITH_IMM) {
 		plen = 4;
-		wqe->send.sgl[0].stag = wr->imm_data;
+		wqe->send.sgl[0].stag = wr->ex.imm_data;
 		wqe->send.sgl[0].len = __constant_cpu_to_be32(0);
 		wqe->send.num_sgle = __constant_cpu_to_be32(0);
 		*flit_cnt = 5;
@@ -112,7 +112,7 @@
 
 	if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
 		plen = 4;
-		wqe->write.sgl[0].stag = wr->imm_data;
+		wqe->write.sgl[0].stag = wr->ex.imm_data;
 		wqe->write.sgl[0].len = __constant_cpu_to_be32(0);
 		wqe->write.num_sgle = __constant_cpu_to_be32(0);
 		*flit_cnt = 6;
@@ -168,30 +168,30 @@
 
 		mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
 		if (!mhp) {
-			PDBG("%s %d\n", __FUNCTION__, __LINE__);
+			PDBG("%s %d\n", __func__, __LINE__);
 			return -EIO;
 		}
 		if (!mhp->attr.state) {
-			PDBG("%s %d\n", __FUNCTION__, __LINE__);
+			PDBG("%s %d\n", __func__, __LINE__);
 			return -EIO;
 		}
 		if (mhp->attr.zbva) {
-			PDBG("%s %d\n", __FUNCTION__, __LINE__);
+			PDBG("%s %d\n", __func__, __LINE__);
 			return -EIO;
 		}
 
 		if (sg_list[i].addr < mhp->attr.va_fbo) {
-			PDBG("%s %d\n", __FUNCTION__, __LINE__);
+			PDBG("%s %d\n", __func__, __LINE__);
 			return -EINVAL;
 		}
 		if (sg_list[i].addr + ((u64) sg_list[i].length) <
 		    sg_list[i].addr) {
-			PDBG("%s %d\n", __FUNCTION__, __LINE__);
+			PDBG("%s %d\n", __func__, __LINE__);
 			return -EINVAL;
 		}
 		if (sg_list[i].addr + ((u64) sg_list[i].length) >
 		    mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
-			PDBG("%s %d\n", __FUNCTION__, __LINE__);
+			PDBG("%s %d\n", __func__, __LINE__);
 			return -EINVAL;
 		}
 		offset = sg_list[i].addr - mhp->attr.va_fbo;
@@ -290,7 +290,7 @@
 				qhp->wq.oldest_read = sqp;
 			break;
 		default:
-			PDBG("%s post of type=%d TBD!\n", __FUNCTION__,
+			PDBG("%s post of type=%d TBD!\n", __func__,
 			     wr->opcode);
 			err = -EINVAL;
 		}
@@ -309,7 +309,7 @@
 			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
 			       0, t3_wr_flit_cnt);
 		PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
-		     __FUNCTION__, (unsigned long long) wr->wr_id, idx,
+		     __func__, (unsigned long long) wr->wr_id, idx,
 		     Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
 		     sqp->opcode);
 		wr = wr->next;
@@ -361,7 +361,7 @@
 			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
 			       0, sizeof(struct t3_receive_wr) >> 3);
 		PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
-		     "wqe %p \n", __FUNCTION__, (unsigned long long) wr->wr_id,
+		     "wqe %p \n", __func__, (unsigned long long) wr->wr_id,
 		     idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
 		++(qhp->wq.rq_wptr);
 		++(qhp->wq.wptr);
@@ -407,7 +407,7 @@
 		return -ENOMEM;
 	}
 	idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
-	PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __FUNCTION__, idx,
+	PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx,
 	     mw, mw_bind);
 	wqe = (union t3_wr *) (qhp->wq.queue + idx);
 
@@ -595,10 +595,10 @@
 	struct terminate_message *term;
 	struct sk_buff *skb;
 
-	PDBG("%s %d\n", __FUNCTION__, __LINE__);
+	PDBG("%s %d\n", __func__, __LINE__);
 	skb = alloc_skb(40, GFP_ATOMIC);
 	if (!skb) {
-		printk(KERN_ERR "%s cannot send TERMINATE!\n", __FUNCTION__);
+		printk(KERN_ERR "%s cannot send TERMINATE!\n", __func__);
 		return -ENOMEM;
 	}
 	wqe = (union t3_wr *)skb_put(skb, 40);
@@ -629,7 +629,7 @@
 	rchp = get_chp(qhp->rhp, qhp->attr.rcq);
 	schp = get_chp(qhp->rhp, qhp->attr.scq);
 
-	PDBG("%s qhp %p rchp %p schp %p\n", __FUNCTION__, qhp, rchp, schp);
+	PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
 	/* take a ref on the qhp since we must release the lock */
 	atomic_inc(&qhp->refcnt);
 	spin_unlock_irqrestore(&qhp->lock, *flag);
@@ -720,11 +720,11 @@
 	init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0;
 	init_attr.irs = qhp->ep->rcv_seq;
 	PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
-	     "flags 0x%x qpcaps 0x%x\n", __FUNCTION__,
+	     "flags 0x%x qpcaps 0x%x\n", __func__,
 	     init_attr.rq_addr, init_attr.rq_size,
 	     init_attr.flags, init_attr.qpcaps);
 	ret = cxio_rdma_init(&rhp->rdev, &init_attr);
-	PDBG("%s ret %d\n", __FUNCTION__, ret);
+	PDBG("%s ret %d\n", __func__, ret);
 	return ret;
 }
 
@@ -742,7 +742,7 @@
 	int free = 0;
 	struct iwch_ep *ep = NULL;
 
-	PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __FUNCTION__,
+	PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__,
 	     qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
 	     (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
 
@@ -899,14 +899,14 @@
 		break;
 	default:
 		printk(KERN_ERR "%s in a bad state %d\n",
-		       __FUNCTION__, qhp->attr.state);
+		       __func__, qhp->attr.state);
 		ret = -EINVAL;
 		goto err;
 		break;
 	}
 	goto out;
 err:
-	PDBG("%s disassociating ep %p qpid 0x%x\n", __FUNCTION__, qhp->ep,
+	PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
 	     qhp->wq.qpid);
 
 	/* disassociate the LLP connection */
@@ -939,7 +939,7 @@
 	if (free)
 		put_ep(&ep->com);
 
-	PDBG("%s exit state %d\n", __FUNCTION__, qhp->attr.state);
+	PDBG("%s exit state %d\n", __func__, qhp->attr.state);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
index 194c1c3..56735ea 100644
--- a/drivers/infiniband/hw/ehca/ehca_av.c
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@@ -41,9 +41,6 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-
-#include <asm/current.h>
-
 #include "ehca_tools.h"
 #include "ehca_iverbs.h"
 #include "hcp_if.h"
@@ -170,17 +167,8 @@
 {
 	struct ehca_av *av;
 	struct ehca_ud_av new_ehca_av;
-	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
 	struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca,
 					      ib_device);
-	u32 cur_pid = current->tgid;
-
-	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
-	    my_pd->ownpid != cur_pid) {
-		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_pd->ownpid);
-		return -EINVAL;
-	}
 
 	memset(&new_ehca_av, 0, sizeof(new_ehca_av));
 	new_ehca_av.sl = ah_attr->sl;
@@ -242,15 +230,6 @@
 int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
 {
 	struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
-	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
-	u32 cur_pid = current->tgid;
-
-	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
-	    my_pd->ownpid != cur_pid) {
-		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_pd->ownpid);
-		return -EINVAL;
-	}
 
 	memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
 	       sizeof(ah_attr->grh.dgid));
@@ -273,16 +252,6 @@
 
 int ehca_destroy_ah(struct ib_ah *ah)
 {
-	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
-	u32 cur_pid = current->tgid;
-
-	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
-	    my_pd->ownpid != cur_pid) {
-		ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_pd->ownpid);
-		return -EINVAL;
-	}
-
 	kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
 
 	return 0;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 92cce8a..0d13fe0 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -132,7 +132,6 @@
 struct ehca_pd {
 	struct ib_pd ib_pd;
 	struct ipz_pd fw_pd;
-	u32 ownpid;
 	/* small queue mgmt */
 	struct mutex lock;
 	struct list_head free[2];
@@ -215,7 +214,6 @@
 	atomic_t nr_events; /* #events seen */
 	wait_queue_head_t wait_completion;
 	spinlock_t task_lock;
-	u32 ownpid;
 	/* mmap counter for resources mapped into user space */
 	u32 mm_count_queue;
 	u32 mm_count_galpa;
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 0467c15..ec0cfcf 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -43,8 +43,6 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <asm/current.h>
-
 #include "ehca_iverbs.h"
 #include "ehca_classes.h"
 #include "ehca_irq.h"
@@ -148,7 +146,6 @@
 	spin_lock_init(&my_cq->task_lock);
 	atomic_set(&my_cq->nr_events, 0);
 	init_waitqueue_head(&my_cq->wait_completion);
-	my_cq->ownpid = current->tgid;
 
 	cq = &my_cq->ib_cq;
 
@@ -320,7 +317,6 @@
 	struct ehca_shca *shca = container_of(device, struct ehca_shca,
 					      ib_device);
 	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
-	u32 cur_pid = current->tgid;
 	unsigned long flags;
 
 	if (cq->uobject) {
@@ -329,12 +325,6 @@
 				 "user space cq_num=%x", my_cq->cq_number);
 			return -EINVAL;
 		}
-		if (my_cq->ownpid != cur_pid) {
-			ehca_err(device, "Invalid caller pid=%x ownpid=%x "
-				 "cq_num=%x",
-				 cur_pid, my_cq->ownpid, my_cq->cq_number);
-			return -EINVAL;
-		}
 	}
 
 	/*
@@ -374,15 +364,6 @@
 
 int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
 {
-	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-	u32 cur_pid = current->tgid;
-
-	if (cq->uobject && my_cq->ownpid != cur_pid) {
-		ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_cq->ownpid);
-		return -EINVAL;
-	}
-
 	/* TODO: proper resize needs to be done */
 	ehca_err(cq->device, "not implemented yet");
 
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 5bd7b59..2515cbd 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -43,6 +43,11 @@
 #include "ehca_iverbs.h"
 #include "hcp_if.h"
 
+static unsigned int limit_uint(unsigned int value)
+{
+	return min_t(unsigned int, value, INT_MAX);
+}
+
 int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
 {
 	int i, ret = 0;
@@ -83,37 +88,40 @@
 	props->vendor_id       = rblock->vendor_id >> 8;
 	props->vendor_part_id  = rblock->vendor_part_id >> 16;
 	props->hw_ver          = rblock->hw_ver;
-	props->max_qp          = min_t(unsigned, rblock->max_qp, INT_MAX);
-	props->max_qp_wr       = min_t(unsigned, rblock->max_wqes_wq, INT_MAX);
-	props->max_sge         = min_t(unsigned, rblock->max_sge, INT_MAX);
-	props->max_sge_rd      = min_t(unsigned, rblock->max_sge_rd, INT_MAX);
-	props->max_cq          = min_t(unsigned, rblock->max_cq, INT_MAX);
-	props->max_cqe         = min_t(unsigned, rblock->max_cqe, INT_MAX);
-	props->max_mr          = min_t(unsigned, rblock->max_mr, INT_MAX);
-	props->max_mw          = min_t(unsigned, rblock->max_mw, INT_MAX);
-	props->max_pd          = min_t(unsigned, rblock->max_pd, INT_MAX);
-	props->max_ah          = min_t(unsigned, rblock->max_ah, INT_MAX);
-	props->max_fmr         = min_t(unsigned, rblock->max_mr, INT_MAX);
+	props->max_qp          = limit_uint(rblock->max_qp);
+	props->max_qp_wr       = limit_uint(rblock->max_wqes_wq);
+	props->max_sge         = limit_uint(rblock->max_sge);
+	props->max_sge_rd      = limit_uint(rblock->max_sge_rd);
+	props->max_cq          = limit_uint(rblock->max_cq);
+	props->max_cqe         = limit_uint(rblock->max_cqe);
+	props->max_mr          = limit_uint(rblock->max_mr);
+	props->max_mw          = limit_uint(rblock->max_mw);
+	props->max_pd          = limit_uint(rblock->max_pd);
+	props->max_ah          = limit_uint(rblock->max_ah);
+	props->max_ee          = limit_uint(rblock->max_rd_ee_context);
+	props->max_rdd         = limit_uint(rblock->max_rd_domain);
+	props->max_fmr         = limit_uint(rblock->max_mr);
+	props->local_ca_ack_delay  = limit_uint(rblock->local_ca_ack_delay);
+	props->max_qp_rd_atom  = limit_uint(rblock->max_rr_qp);
+	props->max_ee_rd_atom  = limit_uint(rblock->max_rr_ee_context);
+	props->max_res_rd_atom = limit_uint(rblock->max_rr_hca);
+	props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp);
+	props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context);
 
 	if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
-		props->max_srq         = props->max_qp;
-		props->max_srq_wr      = props->max_qp_wr;
+		props->max_srq         = limit_uint(props->max_qp);
+		props->max_srq_wr      = limit_uint(props->max_qp_wr);
 		props->max_srq_sge     = 3;
 	}
 
-	props->max_pkeys       = 16;
-	props->local_ca_ack_delay
-		= rblock->local_ca_ack_delay;
-	props->max_raw_ipv6_qp
-		= min_t(unsigned, rblock->max_raw_ipv6_qp, INT_MAX);
-	props->max_raw_ethy_qp
-		= min_t(unsigned, rblock->max_raw_ethy_qp, INT_MAX);
-	props->max_mcast_grp
-		= min_t(unsigned, rblock->max_mcast_grp, INT_MAX);
-	props->max_mcast_qp_attach
-		= min_t(unsigned, rblock->max_mcast_qp_attach, INT_MAX);
+	props->max_pkeys           = 16;
+	props->local_ca_ack_delay  = limit_uint(rblock->local_ca_ack_delay);
+	props->max_raw_ipv6_qp     = limit_uint(rblock->max_raw_ipv6_qp);
+	props->max_raw_ethy_qp     = limit_uint(rblock->max_raw_ethy_qp);
+	props->max_mcast_grp       = limit_uint(rblock->max_mcast_grp);
+	props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach);
 	props->max_total_mcast_qp_attach
-		= min_t(unsigned, rblock->max_total_mcast_qp_attach, INT_MAX);
+		= limit_uint(rblock->max_total_mcast_qp_attach);
 
 	/* translate device capabilities */
 	props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
@@ -128,6 +136,46 @@
 	return ret;
 }
 
+static int map_mtu(struct ehca_shca *shca, u32 fw_mtu)
+{
+	switch (fw_mtu) {
+	case 0x1:
+		return IB_MTU_256;
+	case 0x2:
+		return IB_MTU_512;
+	case 0x3:
+		return IB_MTU_1024;
+	case 0x4:
+		return IB_MTU_2048;
+	case 0x5:
+		return IB_MTU_4096;
+	default:
+		ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
+			 fw_mtu);
+		return 0;
+	}
+}
+
+static int map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
+{
+	switch (vl_cap) {
+	case 0x1:
+		return 1;
+	case 0x2:
+		return 2;
+	case 0x3:
+		return 4;
+	case 0x4:
+		return 8;
+	case 0x5:
+		return 15;
+	default:
+		ehca_err(&shca->ib_device, "invalid Vl Capability: %x.",
+			 vl_cap);
+		return 0;
+	}
+}
+
 int ehca_query_port(struct ib_device *ibdev,
 		    u8 port, struct ib_port_attr *props)
 {
@@ -152,31 +200,13 @@
 
 	memset(props, 0, sizeof(struct ib_port_attr));
 
-	switch (rblock->max_mtu) {
-	case 0x1:
-		props->active_mtu = props->max_mtu = IB_MTU_256;
-		break;
-	case 0x2:
-		props->active_mtu = props->max_mtu = IB_MTU_512;
-		break;
-	case 0x3:
-		props->active_mtu = props->max_mtu = IB_MTU_1024;
-		break;
-	case 0x4:
-		props->active_mtu = props->max_mtu = IB_MTU_2048;
-		break;
-	case 0x5:
-		props->active_mtu = props->max_mtu = IB_MTU_4096;
-		break;
-	default:
-		ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
-			 rblock->max_mtu);
-		break;
-	}
-
+	props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu);
 	props->port_cap_flags  = rblock->capability_mask;
 	props->gid_tbl_len     = rblock->gid_tbl_len;
-	props->max_msg_sz      = rblock->max_msg_sz;
+	if (rblock->max_msg_sz)
+		props->max_msg_sz      = rblock->max_msg_sz;
+	else
+		props->max_msg_sz      = 0x1 << 31;
 	props->bad_pkey_cntr   = rblock->bad_pkey_cntr;
 	props->qkey_viol_cntr  = rblock->qkey_viol_cntr;
 	props->pkey_tbl_len    = rblock->pkey_tbl_len;
@@ -186,6 +216,7 @@
 	props->sm_sl           = rblock->sm_sl;
 	props->subnet_timeout  = rblock->subnet_timeout;
 	props->init_type_reply = rblock->init_type_reply;
+	props->max_vl_num      = map_number_of_vls(shca, rblock->vl_cap);
 
 	if (rblock->state && rblock->phys_width) {
 		props->phys_state      = rblock->phys_pstate;
@@ -314,7 +345,7 @@
 	return ret;
 }
 
-const u32 allowed_port_caps = (
+static const u32 allowed_port_caps = (
 	IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
 	IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
 	IB_PORT_VENDOR_CLASS_SUP);
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index a86ebcc..65b3362 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -57,16 +57,17 @@
 MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
 MODULE_VERSION(HCAD_VERSION);
 
-int ehca_open_aqp1     = 0;
+static int ehca_open_aqp1     = 0;
+static int ehca_hw_level      = 0;
+static int ehca_poll_all_eqs  = 1;
+static int ehca_mr_largepage  = 1;
+
 int ehca_debug_level   = 0;
-int ehca_hw_level      = 0;
 int ehca_nr_ports      = 2;
 int ehca_use_hp_mr     = 0;
 int ehca_port_act_time = 30;
-int ehca_poll_all_eqs  = 1;
 int ehca_static_rate   = -1;
 int ehca_scaling_code  = 0;
-int ehca_mr_largepage  = 1;
 int ehca_lock_hcalls   = -1;
 
 module_param_named(open_aqp1,     ehca_open_aqp1,     int, S_IRUGO);
@@ -396,7 +397,7 @@
 	return ret;
 }
 
-int ehca_init_device(struct ehca_shca *shca)
+static int ehca_init_device(struct ehca_shca *shca)
 {
 	int ret;
 
@@ -579,8 +580,8 @@
 	return 1;
 }
 
-DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
-	    ehca_show_debug_level, ehca_store_debug_level);
+static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
+		   ehca_show_debug_level, ehca_store_debug_level);
 
 static struct attribute *ehca_drv_attrs[] = {
 	&driver_attr_debug_level.attr,
@@ -941,7 +942,7 @@
 	spin_unlock(&shca_list_lock);
 }
 
-int __init ehca_module_init(void)
+static int __init ehca_module_init(void)
 {
 	int ret;
 
@@ -988,7 +989,7 @@
 	return ret;
 };
 
-void __exit ehca_module_exit(void)
+static void __exit ehca_module_exit(void)
 {
 	if (ehca_poll_all_eqs == 1)
 		del_timer_sync(&poll_eqs_timer);
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index e239bbf..f26997f 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -40,8 +40,6 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <asm/current.h>
-
 #include <rdma/ib_umem.h>
 
 #include "ehca_iverbs.h"
@@ -419,7 +417,6 @@
 	struct ehca_shca *shca =
 		container_of(mr->device, struct ehca_shca, ib_device);
 	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-	struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
 	u64 new_size;
 	u64 *new_start;
 	u32 new_acl;
@@ -429,15 +426,6 @@
 	u32 num_kpages = 0;
 	u32 num_hwpages = 0;
 	struct ehca_mr_pginfo pginfo;
-	u32 cur_pid = current->tgid;
-
-	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
-	    (my_pd->ownpid != cur_pid)) {
-		ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_pd->ownpid);
-		ret = -EINVAL;
-		goto rereg_phys_mr_exit0;
-	}
 
 	if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
 		/* TODO not supported, because PHYP rereg hCall needs pages */
@@ -577,19 +565,9 @@
 	struct ehca_shca *shca =
 		container_of(mr->device, struct ehca_shca, ib_device);
 	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-	struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
-	u32 cur_pid = current->tgid;
 	unsigned long sl_flags;
 	struct ehca_mr_hipzout_parms hipzout;
 
-	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
-	    (my_pd->ownpid != cur_pid)) {
-		ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_pd->ownpid);
-		ret = -EINVAL;
-		goto query_mr_exit0;
-	}
-
 	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
 		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
 			 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
@@ -634,16 +612,6 @@
 	struct ehca_shca *shca =
 		container_of(mr->device, struct ehca_shca, ib_device);
 	struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
-	struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
-	u32 cur_pid = current->tgid;
-
-	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
-	    (my_pd->ownpid != cur_pid)) {
-		ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_pd->ownpid);
-		ret = -EINVAL;
-		goto dereg_mr_exit0;
-	}
 
 	if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
 		ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
@@ -1952,9 +1920,8 @@
 	return ret;
 }
 
-int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
-			  u32 number,
-			  u64 *kpage)
+static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
+				 u32 number, u64 *kpage)
 {
 	int ret = 0;
 	struct ib_phys_buf *pbuf;
@@ -2012,9 +1979,8 @@
 	return ret;
 }
 
-int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
-			 u32 number,
-			 u64 *kpage)
+static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
+				u32 number, u64 *kpage)
 {
 	int ret = 0;
 	u64 *fmrlist;
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
index 43bcf08..2fe5548 100644
--- a/drivers/infiniband/hw/ehca/ehca_pd.c
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -38,8 +38,6 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <asm/current.h>
-
 #include "ehca_tools.h"
 #include "ehca_iverbs.h"
 
@@ -58,7 +56,6 @@
 		return ERR_PTR(-ENOMEM);
 	}
 
-	pd->ownpid = current->tgid;
 	for (i = 0; i < 2; i++) {
 		INIT_LIST_HEAD(&pd->free[i]);
 		INIT_LIST_HEAD(&pd->full[i]);
@@ -85,18 +82,10 @@
 
 int ehca_dealloc_pd(struct ib_pd *pd)
 {
-	u32 cur_pid = current->tgid;
 	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
 	int i, leftovers = 0;
 	struct ipz_small_queue_page *page, *tmp;
 
-	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
-	    my_pd->ownpid != cur_pid) {
-		ehca_err(pd->device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_pd->ownpid);
-		return -EINVAL;
-	}
-
 	for (i = 0; i < 2; i++) {
 		list_splice(&my_pd->full[i], &my_pd->free[i]);
 		list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) {
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 1012f15..3eb14a5 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -43,9 +43,6 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-
-#include <asm/current.h>
-
 #include "ehca_classes.h"
 #include "ehca_tools.h"
 #include "ehca_qes.h"
@@ -424,6 +421,9 @@
 	u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
 	unsigned long flags;
 
+	if (init_attr->create_flags)
+		return ERR_PTR(-EINVAL);
+
 	memset(&parms, 0, sizeof(parms));
 	qp_type = init_attr->qp_type;
 
@@ -1526,16 +1526,6 @@
 	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
 					      ib_device);
 	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
-					     ib_pd);
-	u32 cur_pid = current->tgid;
-
-	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
-	    my_pd->ownpid != cur_pid) {
-		ehca_err(ibqp->pd->device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_pd->ownpid);
-		return -EINVAL;
-	}
 
 	/* The if-block below caches qp_attr to be modified for GSI and SMI
 	 * qps during the initialization by ib_mad. When the respective port
@@ -1636,23 +1626,13 @@
 		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
 {
 	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
-					     ib_pd);
 	struct ehca_shca *shca = container_of(qp->device, struct ehca_shca,
 					      ib_device);
 	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
 	struct hcp_modify_qp_control_block *qpcb;
-	u32 cur_pid = current->tgid;
 	int cnt, ret = 0;
 	u64 h_ret;
 
-	if (my_pd->ib_pd.uobject  && my_pd->ib_pd.uobject->context  &&
-	    my_pd->ownpid != cur_pid) {
-		ehca_err(qp->device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_pd->ownpid);
-		return -EINVAL;
-	}
-
 	if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
 		ehca_err(qp->device, "Invalid attribute mask "
 			 "ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
@@ -1797,8 +1777,6 @@
 {
 	struct ehca_qp *my_qp =
 		container_of(ibsrq, struct ehca_qp, ib_srq);
-	struct ehca_pd *my_pd =
-		container_of(ibsrq->pd, struct ehca_pd, ib_pd);
 	struct ehca_shca *shca =
 		container_of(ibsrq->pd->device, struct ehca_shca, ib_device);
 	struct hcp_modify_qp_control_block *mqpcb;
@@ -1806,14 +1784,6 @@
 	u64 h_ret;
 	int ret = 0;
 
-	u32 cur_pid = current->tgid;
-	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
-	    my_pd->ownpid != cur_pid) {
-		ehca_err(ibsrq->pd->device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_pd->ownpid);
-		return -EINVAL;
-	}
-
 	mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!mqpcb) {
 		ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb "
@@ -1864,22 +1834,13 @@
 int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
 {
 	struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq);
-	struct ehca_pd *my_pd = container_of(srq->pd, struct ehca_pd, ib_pd);
 	struct ehca_shca *shca = container_of(srq->device, struct ehca_shca,
 					      ib_device);
 	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
 	struct hcp_modify_qp_control_block *qpcb;
-	u32 cur_pid = current->tgid;
 	int ret = 0;
 	u64 h_ret;
 
-	if (my_pd->ib_pd.uobject  && my_pd->ib_pd.uobject->context  &&
-	    my_pd->ownpid != cur_pid) {
-		ehca_err(srq->device, "Invalid caller pid=%x ownpid=%x",
-			 cur_pid, my_pd->ownpid);
-		return -EINVAL;
-	}
-
 	qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!qpcb) {
 		ehca_err(srq->device, "Out of memory for qpcb "
@@ -1919,7 +1880,6 @@
 	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
 					     ib_pd);
 	struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1];
-	u32 cur_pid = current->tgid;
 	u32 qp_num = my_qp->real_qp_num;
 	int ret;
 	u64 h_ret;
@@ -1934,11 +1894,6 @@
 				 "user space qp_num=%x", qp_num);
 			return -EINVAL;
 		}
-		if (my_pd->ownpid != cur_pid) {
-			ehca_err(dev, "Invalid caller pid=%x ownpid=%x",
-				 cur_pid, my_pd->ownpid);
-			return -EINVAL;
-		}
 	}
 
 	if (my_qp->send_cq) {
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 2ce8cff..a20bbf4 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -188,7 +188,7 @@
 	if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
 	    send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
 		/* this might not work as long as HW does not support it */
-		wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data);
+		wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);
 		wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
 	}
 
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
index 4a8346a..ec950bf 100644
--- a/drivers/infiniband/hw/ehca/ehca_tools.h
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -73,37 +73,37 @@
 		if (unlikely(ehca_debug_level)) \
 			dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
 				   "PU%04x EHCA_DBG:%s " format "\n", \
-				   raw_smp_processor_id(), __FUNCTION__, \
+				   raw_smp_processor_id(), __func__, \
 				   ## arg); \
 	} while (0)
 
 #define ehca_info(ib_dev, format, arg...) \
 	dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
-		 raw_smp_processor_id(), __FUNCTION__, ## arg)
+		 raw_smp_processor_id(), __func__, ## arg)
 
 #define ehca_warn(ib_dev, format, arg...) \
 	dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
-		 raw_smp_processor_id(), __FUNCTION__, ## arg)
+		 raw_smp_processor_id(), __func__, ## arg)
 
 #define ehca_err(ib_dev, format, arg...) \
 	dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
-		raw_smp_processor_id(), __FUNCTION__, ## arg)
+		raw_smp_processor_id(), __func__, ## arg)
 
 /* use this one only if no ib_dev available */
 #define ehca_gen_dbg(format, arg...) \
 	do { \
 		if (unlikely(ehca_debug_level)) \
 			printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \
-			       raw_smp_processor_id(), __FUNCTION__, ## arg); \
+			       raw_smp_processor_id(), __func__, ## arg); \
 	} while (0)
 
 #define ehca_gen_warn(format, arg...) \
 	printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
-	       raw_smp_processor_id(), __FUNCTION__, ## arg)
+	       raw_smp_processor_id(), __func__, ## arg)
 
 #define ehca_gen_err(format, arg...) \
 	printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
-	       raw_smp_processor_id(), __FUNCTION__, ## arg)
+	       raw_smp_processor_id(), __func__, ## arg)
 
 /**
  * ehca_dmp - printk a memory block, whose length is n*8 bytes.
@@ -118,7 +118,7 @@
 		for (x = 0; x < l; x += 16) { \
 			printk(KERN_INFO "EHCA_DMP:%s " format \
 			       " adr=%p ofs=%04x %016lx %016lx\n", \
-			       __FUNCTION__, ##args, deb, x, \
+			       __func__, ##args, deb, x, \
 			       *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
 			deb += 16; \
 		} \
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 5234d6c..1b07f2b 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -40,8 +40,6 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <asm/current.h>
-
 #include "ehca_classes.h"
 #include "ehca_iverbs.h"
 #include "ehca_mrmw.h"
@@ -253,11 +251,9 @@
 	u32 idr_handle = fileoffset & 0x1FFFFFF;
 	u32 q_type = (fileoffset >> 27) & 0x1;	  /* CQ, QP,...        */
 	u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */
-	u32 cur_pid = current->tgid;
 	u32 ret;
 	struct ehca_cq *cq;
 	struct ehca_qp *qp;
-	struct ehca_pd *pd;
 	struct ib_uobject *uobject;
 
 	switch (q_type) {
@@ -270,13 +266,6 @@
 		if (!cq)
 			return -EINVAL;
 
-		if (cq->ownpid != cur_pid) {
-			ehca_err(cq->ib_cq.device,
-				 "Invalid caller pid=%x ownpid=%x",
-				 cur_pid, cq->ownpid);
-			return -ENOMEM;
-		}
-
 		if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
 			return -EINVAL;
 
@@ -298,14 +287,6 @@
 		if (!qp)
 			return -EINVAL;
 
-		pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
-		if (pd->ownpid != cur_pid) {
-			ehca_err(qp->ib_qp.device,
-				 "Invalid caller pid=%x ownpid=%x",
-				 cur_pid, pd->ownpid);
-			return -ENOMEM;
-		}
-
 		uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject;
 		if (!uobject || uobject->context != context)
 			return -EINVAL;
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index fe67388..75a6c91 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -20,17 +20,20 @@
 	ipath_qp.o \
 	ipath_rc.o \
 	ipath_ruc.o \
+	ipath_sdma.o \
 	ipath_srq.o \
 	ipath_stats.o \
 	ipath_sysfs.o \
 	ipath_uc.o \
 	ipath_ud.o \
 	ipath_user_pages.o \
+	ipath_user_sdma.o \
 	ipath_verbs_mcast.o \
 	ipath_verbs.o
 
 ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o
 ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba6120.o
+ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba7220.o ipath_sd7220.o ipath_sd7220_img.o
 
 ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
 ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/infiniband/hw/ipath/ipath_7220.h b/drivers/infiniband/hw/ipath/ipath_7220.h
new file mode 100644
index 0000000..74fa5cc
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_7220.h
@@ -0,0 +1,57 @@
+#ifndef _IPATH_7220_H
+#define _IPATH_7220_H
+/*
+ * Copyright (c) 2007 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This header file provides the declarations and common definitions
+ * for (mostly) manipulation of the SerDes blocks within the IBA7220.
+ * the functions declared should only be called from within other
+ * 7220-related files such as ipath_iba7220.c or ipath_sd7220.c.
+ */
+int ipath_sd7220_presets(struct ipath_devdata *dd);
+int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset);
+int ipath_sd7220_prog_ld(struct ipath_devdata *dd, int sdnum, u8 *img,
+	int len, int offset);
+int ipath_sd7220_prog_vfy(struct ipath_devdata *dd, int sdnum, const u8 *img,
+	int len, int offset);
+/*
+ * Below used for sdnum parameter, selecting one of the two sections
+ * used for PCIe, or the single SerDes used for IB, which is the
+ * only one currently used
+ */
+#define IB_7220_SERDES 2
+
+int ipath_sd7220_ib_load(struct ipath_devdata *dd);
+int ipath_sd7220_ib_vfy(struct ipath_devdata *dd);
+
+#endif /* _IPATH_7220_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 591901a..28cfe97 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -80,6 +80,8 @@
 #define IPATH_IB_LINKDOWN_DISABLE	5
 #define IPATH_IB_LINK_LOOPBACK	6 /* enable local loopback */
 #define IPATH_IB_LINK_EXTERNAL	7 /* normal, disable local loopback */
+#define IPATH_IB_LINK_NO_HRTBT	8 /* disable Heartbeat, e.g. for loopback */
+#define IPATH_IB_LINK_HRTBT	9 /* enable heartbeat, normal, non-loopback */
 
 /*
  * These 3 values (SDR and DDR may be ORed for auto-speed
@@ -198,7 +200,8 @@
 #define IPATH_RUNTIME_FORCE_WC_ORDER	0x4
 #define IPATH_RUNTIME_RCVHDR_COPY	0x8
 #define IPATH_RUNTIME_MASTER	0x10
-/* 0x20 and 0x40 are no longer used, but are reserved for ABI compatibility */
+#define IPATH_RUNTIME_NODMA_RTAIL 0x80
+#define IPATH_RUNTIME_SDMA	      0x200
 #define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400
 #define IPATH_RUNTIME_PIO_REGSWAPPED 0x800
 
@@ -444,8 +447,9 @@
 #define IPATH_CMD_PIOAVAILUPD	27	/* force an update of PIOAvail reg */
 #define IPATH_CMD_POLL_TYPE	28	/* set the kind of polling we want */
 #define IPATH_CMD_ARMLAUNCH_CTRL	29 /* armlaunch detection control */
-
-#define IPATH_CMD_MAX		29
+/* 30 is unused */
+#define IPATH_CMD_SDMA_INFLIGHT 31	/* sdma inflight counter request */
+#define IPATH_CMD_SDMA_COMPLETE 32	/* sdma completion counter request */
 
 /*
  * Poll types
@@ -483,6 +487,17 @@
 	union {
 		struct ipath_tid_info tid_info;
 		struct ipath_user_info user_info;
+
+		/*
+		 * address in userspace where we should put the sdma
+		 * inflight counter
+		 */
+		__u64 sdma_inflight;
+		/*
+		 * address in userspace where we should put the sdma
+		 * completion counter
+		 */
+		__u64 sdma_complete;
 		/* address in userspace of struct ipath_port_info to
 		   write result to */
 		__u64 port_info;
@@ -537,7 +552,7 @@
 
 /* The second diag_pkt struct is the expanded version that allows
  * more control over the packet, specifically, by allowing a custom
- * pbc (+ extra) qword, so that special modes and deliberate
+ * pbc (+ static rate) qword, so that special modes and deliberate
  * changes to CRCs can be used. The elements were also re-ordered
  * for better alignment and to avoid padding issues.
  */
@@ -662,8 +677,12 @@
 #define INFINIPATH_RHF_LENGTH_SHIFT 0
 #define INFINIPATH_RHF_RCVTYPE_MASK 0x7
 #define INFINIPATH_RHF_RCVTYPE_SHIFT 11
-#define INFINIPATH_RHF_EGRINDEX_MASK 0x7FF
+#define INFINIPATH_RHF_EGRINDEX_MASK 0xFFF
 #define INFINIPATH_RHF_EGRINDEX_SHIFT 16
+#define INFINIPATH_RHF_SEQ_MASK 0xF
+#define INFINIPATH_RHF_SEQ_SHIFT 0
+#define INFINIPATH_RHF_HDRQ_OFFSET_MASK 0x7FF
+#define INFINIPATH_RHF_HDRQ_OFFSET_SHIFT 4
 #define INFINIPATH_RHF_H_ICRCERR   0x80000000
 #define INFINIPATH_RHF_H_VCRCERR   0x40000000
 #define INFINIPATH_RHF_H_PARITYERR 0x20000000
@@ -673,6 +692,8 @@
 #define INFINIPATH_RHF_H_TIDERR    0x02000000
 #define INFINIPATH_RHF_H_MKERR     0x01000000
 #define INFINIPATH_RHF_H_IBERR     0x00800000
+#define INFINIPATH_RHF_H_ERR_MASK  0xFF800000
+#define INFINIPATH_RHF_L_USE_EGR   0x80000000
 #define INFINIPATH_RHF_L_SWA       0x00008000
 #define INFINIPATH_RHF_L_SWB       0x00004000
 
@@ -696,6 +717,7 @@
 /* SendPIO per-buffer control */
 #define INFINIPATH_SP_TEST    0x40
 #define INFINIPATH_SP_TESTEBP 0x20
+#define INFINIPATH_SP_TRIGGER_SHIFT  15
 
 /* SendPIOAvail bits */
 #define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1
@@ -762,6 +784,7 @@
 #define IPATH_MSN_MASK 0xFFFFFF
 #define IPATH_QPN_MASK 0xFFFFFF
 #define IPATH_MULTICAST_LID_BASE 0xC000
+#define IPATH_EAGER_TID_ID INFINIPATH_I_TID_MASK
 #define IPATH_MULTICAST_QPN 0xFFFFFF
 
 /* Receive Header Queue: receive type (from infinipath) */
@@ -781,7 +804,7 @@
  */
 static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf)
 {
-	return __le32_to_cpu(rbuf[1]);
+	return __le32_to_cpu(rbuf[1]) & INFINIPATH_RHF_H_ERR_MASK;
 }
 
 static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf)
@@ -802,6 +825,23 @@
 	    & INFINIPATH_RHF_EGRINDEX_MASK;
 }
 
+static inline __u32 ipath_hdrget_seq(const __le32 *rbuf)
+{
+	return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_SEQ_SHIFT)
+		& INFINIPATH_RHF_SEQ_MASK;
+}
+
+static inline __u32 ipath_hdrget_offset(const __le32 *rbuf)
+{
+	return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_HDRQ_OFFSET_SHIFT)
+		& INFINIPATH_RHF_HDRQ_OFFSET_MASK;
+}
+
+static inline __u32 ipath_hdrget_use_egr_buf(const __le32 *rbuf)
+{
+	return __le32_to_cpu(rbuf[0]) & INFINIPATH_RHF_L_USE_EGR;
+}
+
 static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword)
 {
 	return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index d6f6953..65926cd 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -66,6 +66,7 @@
 #define __IPATH_IPATHERR    0x40000	/* Ethernet (IPATH) errors */
 #define __IPATH_IPATHPD     0x80000	/* Ethernet (IPATH) packet dump */
 #define __IPATH_IPATHTABLE  0x100000	/* Ethernet (IPATH) table dump */
+#define __IPATH_LINKVERBDBG 0x200000	/* very verbose linkchange debug */
 
 #else				/* _IPATH_DEBUGGING */
 
@@ -89,6 +90,7 @@
 #define __IPATH_IPATHERR  0x0	/* Ethernet (IPATH) errors on   */
 #define __IPATH_IPATHPD   0x0	/* Ethernet (IPATH) packet dump on   */
 #define __IPATH_IPATHTABLE 0x0	/* Ethernet (IPATH) packet dump on   */
+#define __IPATH_LINKVERBDBG 0x0	/* very verbose linkchange debug */
 
 #endif				/* _IPATH_DEBUGGING */
 
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 4137c77..6d49d2f 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -330,13 +330,19 @@
 	struct ipath_devdata *dd;
 	ssize_t ret = 0;
 	u64 val;
+	u32 l_state, lt_state; /* LinkState, LinkTrainingState */
 
-	if (count != sizeof(dp)) {
+	if (count < sizeof(odp)) {
 		ret = -EINVAL;
 		goto bail;
 	}
 
-	if (copy_from_user(&dp, data, sizeof(dp))) {
+	if (count == sizeof(dp)) {
+		if (copy_from_user(&dp, data, sizeof(dp))) {
+			ret = -EFAULT;
+			goto bail;
+		}
+	} else if (copy_from_user(&odp, data, sizeof(odp))) {
 		ret = -EFAULT;
 		goto bail;
 	}
@@ -396,10 +402,17 @@
 		ret = -ENODEV;
 		goto bail;
 	}
-	/* Check link state, but not if we have custom PBC */
-	val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
-	if (!dp.pbc_wd && val != IPATH_IBSTATE_INIT &&
-		val != IPATH_IBSTATE_ARM && val != IPATH_IBSTATE_ACTIVE) {
+	/*
+	 * Want to skip check for l_state if using custom PBC,
+	 * because we might be trying to force an SM packet out.
+	 * first-cut, skip _all_ state checking in that case.
+	 */
+	val = ipath_ib_state(dd, dd->ipath_lastibcstat);
+	lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
+	l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
+	if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
+	    (val != dd->ib_init && val != dd->ib_arm &&
+	    val != dd->ib_active))) {
 		ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
 			   dd->ipath_unit, (unsigned long long) val);
 		ret = -EINVAL;
@@ -431,15 +444,17 @@
 		goto bail;
 	}
 
-	piobuf = ipath_getpiobuf(dd, &pbufn);
+	plen >>= 2;		/* in dwords */
+
+	piobuf = ipath_getpiobuf(dd, plen, &pbufn);
 	if (!piobuf) {
 		ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
 			   dd->ipath_unit);
 		ret = -EBUSY;
 		goto bail;
 	}
-
-	plen >>= 2;		/* in dwords */
+	/* disarm it just to be extra sure */
+	ipath_disarm_piobufs(dd, pbufn, 1);
 
 	if (ipath_debug & __IPATH_PKTDBG)
 		ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index ca4d0ac..e0a64f0 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -41,7 +41,6 @@
 
 #include "ipath_kernel.h"
 #include "ipath_verbs.h"
-#include "ipath_common.h"
 
 static void ipath_update_pio_bufs(struct ipath_devdata *);
 
@@ -73,10 +72,27 @@
 MODULE_PARM_DESC(debug, "mask for debug prints");
 EXPORT_SYMBOL_GPL(ipath_debug);
 
+unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
+module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
+MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
+
+static unsigned ipath_hol_timeout_ms = 13000;
+module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
+MODULE_PARM_DESC(hol_timeout_ms,
+	"duration of user app suspension after link failure");
+
+unsigned ipath_linkrecovery = 1;
+module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue");
+
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("QLogic <support@pathscale.com>");
+MODULE_AUTHOR("QLogic <support@qlogic.com>");
 MODULE_DESCRIPTION("QLogic InfiniPath driver");
 
+/*
+ * Table to translate the LINKTRAININGSTATE portion of
+ * IBCStatus to a human-readable form.
+ */
 const char *ipath_ibcstatus_str[] = {
 	"Disabled",
 	"LinkUp",
@@ -91,9 +107,20 @@
 	"CfgWaitRmt",
 	"CfgIdle",
 	"RecovRetrain",
-	"LState0xD",		/* unused */
+	"CfgTxRevLane",		/* unused before IBA7220 */
 	"RecovWaitRmt",
 	"RecovIdle",
+	/* below were added for IBA7220 */
+	"CfgEnhanced",
+	"CfgTest",
+	"CfgWaitRmtTest",
+	"CfgWaitCfgEnhanced",
+	"SendTS_T",
+	"SendTstIdles",
+	"RcvTS_T",
+	"SendTst_TS1s",
+	"LTState18", "LTState19", "LTState1A", "LTState1B",
+	"LTState1C", "LTState1D", "LTState1E", "LTState1F"
 };
 
 static void __devexit ipath_remove_one(struct pci_dev *);
@@ -102,8 +129,10 @@
 
 /* Only needed for registration, nothing else needs this info */
 #define PCI_VENDOR_ID_PATHSCALE 0x1fc1
+#define PCI_VENDOR_ID_QLOGIC 0x1077
 #define PCI_DEVICE_ID_INFINIPATH_HT 0xd
 #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
+#define PCI_DEVICE_ID_INFINIPATH_7220 0x7220
 
 /* Number of seconds before our card status check...  */
 #define STATUS_TIMEOUT 60
@@ -111,6 +140,7 @@
 static const struct pci_device_id ipath_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_INFINIPATH_7220) },
 	{ 0, }
 };
 
@@ -126,19 +156,6 @@
 	},
 };
 
-static void ipath_check_status(struct work_struct *work)
-{
-	struct ipath_devdata *dd = container_of(work, struct ipath_devdata,
-						status_work.work);
-
-	/*
-	 * If we don't have any interrupts, let the user know and
-	 * don't bother checking again.
-	 */
-	if (dd->ipath_int_counter == 0)
-		dev_err(&dd->pcidev->dev, "No interrupts detected.\n");
-}
-
 static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
 			     u32 *bar0, u32 *bar1)
 {
@@ -206,8 +223,6 @@
 	dd->pcidev = pdev;
 	pci_set_drvdata(pdev, dd);
 
-	INIT_DELAYED_WORK(&dd->status_work, ipath_check_status);
-
 	list_add(&dd->ipath_list, &ipath_dev_list);
 
 bail_unlock:
@@ -234,12 +249,12 @@
 	return dd;
 }
 
-int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp)
+int ipath_count_units(int *npresentp, int *nupp, int *maxportsp)
 {
 	int nunits, npresent, nup;
 	struct ipath_devdata *dd;
 	unsigned long flags;
-	u32 maxports;
+	int maxports;
 
 	nunits = npresent = nup = maxports = 0;
 
@@ -304,7 +319,7 @@
 	u32 *addr;
 	u64 msecs, emsecs;
 
-	piobuf = ipath_getpiobuf(dd, &pbnum);
+	piobuf = ipath_getpiobuf(dd, 0, &pbnum);
 	if (!piobuf) {
 		dev_info(&dd->pcidev->dev,
 			"No PIObufs for checking perf, skipping\n");
@@ -336,7 +351,14 @@
 
 	ipath_disable_armlaunch(dd);
 
-	writeq(0, piobuf); /* length 0, no dwords actually sent */
+	/*
+	 * length 0, no dwords actually sent, and mark as VL15
+	 * on chips where that may matter (due to IB flowcontrol)
+	 */
+	if ((dd->ipath_flags & IPATH_HAS_PBC_CNT))
+		writeq(1UL << 63, piobuf);
+	else
+		writeq(0, piobuf);
 	ipath_flush_wc();
 
 	/*
@@ -377,6 +399,7 @@
 	struct ipath_devdata *dd;
 	unsigned long long addr;
 	u32 bar0 = 0, bar1 = 0;
+	u8 rev;
 
 	dd = ipath_alloc_devdata(pdev);
 	if (IS_ERR(dd)) {
@@ -408,7 +431,7 @@
 	}
 	addr = pci_resource_start(pdev, 0);
 	len = pci_resource_len(pdev, 0);
-	ipath_cdbg(VERBOSE, "regbase (0) %llx len %d pdev->irq %d, vend %x/%x "
+	ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x "
 		   "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
 		   ent->device, ent->driver_data);
 
@@ -512,6 +535,13 @@
 			      "CONFIG_PCI_MSI is not enabled\n", ent->device);
 		return -ENODEV;
 #endif
+	case PCI_DEVICE_ID_INFINIPATH_7220:
+#ifndef CONFIG_PCI_MSI
+		ipath_dbg("CONFIG_PCI_MSI is not enabled, "
+			  "using IntX for unit %u\n", dd->ipath_unit);
+#endif
+		ipath_init_iba7220_funcs(dd);
+		break;
 	default:
 		ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
 			      "failing\n", ent->device);
@@ -533,7 +563,13 @@
 		goto bail_regions;
 	}
 
-	dd->ipath_pcirev = pdev->revision;
+	ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
+	if (ret) {
+		ipath_dev_err(dd, "Failed to read PCI revision ID unit "
+			      "%u: err %d\n", dd->ipath_unit, -ret);
+		goto bail_regions;	/* shouldn't ever happen */
+	}
+	dd->ipath_pcirev = rev;
 
 #if defined(__powerpc__)
 	/* There isn't a generic way to specify writethrough mappings */
@@ -556,14 +592,6 @@
 	ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
 		   addr, dd->ipath_kregbase);
 
-	/*
-	 * clear ipath_flags here instead of in ipath_init_chip as it is set
-	 * by ipath_setup_htconfig.
-	 */
-	dd->ipath_flags = 0;
-	dd->ipath_lli_counter = 0;
-	dd->ipath_lli_errors = 0;
-
 	if (dd->ipath_f_bus(dd, pdev))
 		ipath_dev_err(dd, "Failed to setup config space; "
 			      "continuing anyway\n");
@@ -608,13 +636,11 @@
 	ipath_diag_add(dd);
 	ipath_register_ib_device(dd);
 
-	/* Check that card status in STATUS_TIMEOUT seconds. */
-	schedule_delayed_work(&dd->status_work, HZ * STATUS_TIMEOUT);
-
 	goto bail;
 
 bail_irqsetup:
-	if (pdev->irq) free_irq(pdev->irq, dd);
+	if (pdev->irq)
+		free_irq(pdev->irq, dd);
 
 bail_iounmap:
 	iounmap((volatile void __iomem *) dd->ipath_kregbase);
@@ -654,6 +680,10 @@
 		ipath_disable_wc(dd);
 	}
 
+	if (dd->ipath_spectriggerhit)
+		dev_info(&dd->pcidev->dev, "%lu special trigger hits\n",
+			 dd->ipath_spectriggerhit);
+
 	if (dd->ipath_pioavailregs_dma) {
 		dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
 				  (void *) dd->ipath_pioavailregs_dma,
@@ -706,6 +736,8 @@
 		tmpp = dd->ipath_pageshadow;
 		dd->ipath_pageshadow = NULL;
 		vfree(tmpp);
+
+		dd->ipath_egrtidbase = NULL;
 	}
 
 	/*
@@ -738,7 +770,6 @@
 	 */
 	ipath_shutdown_device(dd);
 
-	cancel_delayed_work(&dd->status_work);
 	flush_scheduled_work();
 
 	if (dd->verbs_dev)
@@ -823,20 +854,8 @@
 		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
 	}
-
-	/*
-	 * Disable PIOAVAILUPD, then re-enable, reading scratch in
-	 * between.  This seems to avoid a chip timing race that causes
-	 * pioavail updates to memory to stop.  We xor as we don't
-	 * know the state of the bit when we're called.
-	 */
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-		dd->ipath_sendctrl ^ INFINIPATH_S_PIOBUFAVAILUPD);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			 dd->ipath_sendctrl);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+	/* on some older chips, update may not happen after cancel */
+	ipath_force_pio_avail_update(dd);
 }
 
 /**
@@ -873,18 +892,52 @@
 			   (unsigned long long) ipath_read_kreg64(
 				   dd, dd->ipath_kregs->kr_ibcctrl),
 			   (unsigned long long) val,
-			   ipath_ibcstatus_str[val & 0xf]);
+			   ipath_ibcstatus_str[val & dd->ibcs_lts_mask]);
 	}
 	return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
 }
 
+static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err,
+	char *buf, size_t blen)
+{
+	static const struct {
+		ipath_err_t err;
+		const char *msg;
+	} errs[] = {
+		{ INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" },
+		{ INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" },
+		{ INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" },
+		{ INFINIPATH_E_SDMABASE, "SDmaBase" },
+		{ INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" },
+		{ INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" },
+		{ INFINIPATH_E_SDMADWEN, "SDmaDwEn" },
+		{ INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" },
+		{ INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" },
+		{ INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" },
+		{ INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" },
+		{ INFINIPATH_E_SDMADISABLED, "SDmaDisabled" },
+	};
+	int i;
+	int expected;
+	size_t bidx = 0;
+
+	for (i = 0; i < ARRAY_SIZE(errs); i++) {
+		expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 :
+			test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+		if ((err & errs[i].err) && !expected)
+			bidx += snprintf(buf + bidx, blen - bidx,
+					 "%s ", errs[i].msg);
+	}
+}
+
 /*
  * Decode the error status into strings, deciding whether to always
  * print * it or not depending on "normal packet errors" vs everything
  * else.   Return 1 if "real" errors, otherwise 0 if only packet
  * errors, so caller can decide what to print with the string.
  */
-int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
+int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
+	ipath_err_t err)
 {
 	int iserr = 1;
 	*buf = '\0';
@@ -922,6 +975,8 @@
 		strlcat(buf, "rbadversion ", blen);
 	if (err & INFINIPATH_E_RHDR)
 		strlcat(buf, "rhdr ", blen);
+	if (err & INFINIPATH_E_SENDSPECIALTRIGGER)
+		strlcat(buf, "sendspecialtrigger ", blen);
 	if (err & INFINIPATH_E_RLONGPKTLEN)
 		strlcat(buf, "rlongpktlen ", blen);
 	if (err & INFINIPATH_E_RMAXPKTLEN)
@@ -964,6 +1019,10 @@
 		strlcat(buf, "hardware ", blen);
 	if (err & INFINIPATH_E_RESET)
 		strlcat(buf, "reset ", blen);
+	if (err & INFINIPATH_E_SDMAERRS)
+		decode_sdma_errs(dd, err, buf, blen);
+	if (err & INFINIPATH_E_INVALIDEEPCMD)
+		strlcat(buf, "invalideepromcmd ", blen);
 done:
 	return iserr;
 }
@@ -1076,18 +1135,17 @@
 			     u32 eflags,
 			     u32 l,
 			     u32 etail,
-			     u64 *rc)
+			     __le32 *rhf_addr,
+			     struct ipath_message_header *hdr)
 {
 	char emsg[128];
-	struct ipath_message_header *hdr;
 
 	get_rhf_errstring(eflags, emsg, sizeof emsg);
-	hdr = (struct ipath_message_header *)&rc[1];
 	ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
 		   "tlen=%x opcode=%x egridx=%x: %s\n",
 		   eflags, l,
-		   ipath_hdrget_rcv_type((__le32 *) rc),
-		   ipath_hdrget_length_in_bytes((__le32 *) rc),
+		   ipath_hdrget_rcv_type(rhf_addr),
+		   ipath_hdrget_length_in_bytes(rhf_addr),
 		   be32_to_cpu(hdr->bth[0]) >> 24,
 		   etail, emsg);
 
@@ -1112,55 +1170,52 @@
  */
 void ipath_kreceive(struct ipath_portdata *pd)
 {
-	u64 *rc;
 	struct ipath_devdata *dd = pd->port_dd;
+	__le32 *rhf_addr;
 	void *ebuf;
 	const u32 rsize = dd->ipath_rcvhdrentsize;	/* words */
 	const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize;	/* words */
 	u32 etail = -1, l, hdrqtail;
 	struct ipath_message_header *hdr;
-	u32 eflags, i, etype, tlen, pkttot = 0, updegr=0, reloop=0;
+	u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0;
 	static u64 totcalls;	/* stats, may eventually remove */
-
-	if (!dd->ipath_hdrqtailptr) {
-		ipath_dev_err(dd,
-			      "hdrqtailptr not set, can't do receives\n");
-		goto bail;
-	}
+	int last;
 
 	l = pd->port_head;
-	hdrqtail = ipath_get_rcvhdrtail(pd);
-	if (l == hdrqtail)
-		goto bail;
+	rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset;
+	if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
+		u32 seq = ipath_hdrget_seq(rhf_addr);
+
+		if (seq != pd->port_seq_cnt)
+			goto bail;
+		hdrqtail = 0;
+	} else {
+		hdrqtail = ipath_get_rcvhdrtail(pd);
+		if (l == hdrqtail)
+			goto bail;
+		smp_rmb();
+	}
 
 reloop:
-	for (i = 0; l != hdrqtail; i++) {
-		u32 qp;
-		u8 *bthbytes;
-
-		rc = (u64 *) (pd->port_rcvhdrq + (l << 2));
-		hdr = (struct ipath_message_header *)&rc[1];
-		/*
-		 * could make a network order version of IPATH_KD_QP, and
-		 * do the obvious shift before masking to speed this up.
-		 */
-		qp = ntohl(hdr->bth[1]) & 0xffffff;
-		bthbytes = (u8 *) hdr->bth;
-
-		eflags = ipath_hdrget_err_flags((__le32 *) rc);
-		etype = ipath_hdrget_rcv_type((__le32 *) rc);
+	for (last = 0, i = 1; !last; i++) {
+		hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
+		eflags = ipath_hdrget_err_flags(rhf_addr);
+		etype = ipath_hdrget_rcv_type(rhf_addr);
 		/* total length */
-		tlen = ipath_hdrget_length_in_bytes((__le32 *) rc);
+		tlen = ipath_hdrget_length_in_bytes(rhf_addr);
 		ebuf = NULL;
-		if (etype != RCVHQ_RCV_TYPE_EXPECTED) {
+		if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ?
+		    ipath_hdrget_use_egr_buf(rhf_addr) :
+		    (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
 			/*
-			 * it turns out that the chips uses an eager buffer
+			 * It turns out that the chip uses an eager buffer
 			 * for all non-expected packets, whether it "needs"
 			 * one or not.  So always get the index, but don't
 			 * set ebuf (so we try to copy data) unless the
 			 * length requires it.
 			 */
-			etail = ipath_hdrget_index((__le32 *) rc);
+			etail = ipath_hdrget_index(rhf_addr);
+			updegr = 1;
 			if (tlen > sizeof(*hdr) ||
 			    etype == RCVHQ_RCV_TYPE_NON_KD)
 				ebuf = ipath_get_egrbuf(dd, etail);
@@ -1171,75 +1226,91 @@
 		 * packets; only ipathhdrerr should be set.
 		 */
 
-		if (etype != RCVHQ_RCV_TYPE_NON_KD && etype !=
-		    RCVHQ_RCV_TYPE_ERROR && ipath_hdrget_ipath_ver(
-			    hdr->iph.ver_port_tid_offset) !=
-		    IPS_PROTO_VERSION) {
+		if (etype != RCVHQ_RCV_TYPE_NON_KD &&
+		    etype != RCVHQ_RCV_TYPE_ERROR &&
+		    ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) !=
+		    IPS_PROTO_VERSION)
 			ipath_cdbg(PKT, "Bad InfiniPath protocol version "
 				   "%x\n", etype);
-		}
 
 		if (unlikely(eflags))
-			ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
+			ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr);
 		else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-			ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
+			ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen);
 			if (dd->ipath_lli_counter)
 				dd->ipath_lli_counter--;
+		} else if (etype == RCVHQ_RCV_TYPE_EAGER) {
+			u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24;
+			u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff;
 			ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
 				   "qp=%x), len %x; ignored\n",
-				   etype, bthbytes[0], qp, tlen);
+				   etype, opcode, qp, tlen);
 		}
-		else if (etype == RCVHQ_RCV_TYPE_EAGER)
-			ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
-				   "qp=%x), len %x; ignored\n",
-				   etype, bthbytes[0], qp, tlen);
 		else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
 			ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
-				  be32_to_cpu(hdr->bth[0]) & 0xff);
+				  be32_to_cpu(hdr->bth[0]) >> 24);
 		else {
 			/*
 			 * error packet, type of error unknown.
 			 * Probably type 3, but we don't know, so don't
 			 * even try to print the opcode, etc.
+			 * Usually caused by a "bad packet", that has no
+			 * BTH, when the LRH says it should.
 			 */
-			ipath_dbg("Error Pkt, but no eflags! egrbuf %x, "
-				  "len %x\nhdrq@%lx;hdrq+%x rhf: %llx; "
-				  "hdr %llx %llx %llx %llx %llx\n",
-				  etail, tlen, (unsigned long) rc, l,
-				  (unsigned long long) rc[0],
-				  (unsigned long long) rc[1],
-				  (unsigned long long) rc[2],
-				  (unsigned long long) rc[3],
-				  (unsigned long long) rc[4],
-				  (unsigned long long) rc[5]);
+			ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf"
+				  " %x, len %x hdrq+%x rhf: %Lx\n",
+				  etail, tlen, l,
+				  le64_to_cpu(*(__le64 *) rhf_addr));
+			if (ipath_debug & __IPATH_ERRPKTDBG) {
+				u32 j, *d, dw = rsize-2;
+				if (rsize > (tlen>>2))
+					dw = tlen>>2;
+				d = (u32 *)hdr;
+				printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n",
+					dw);
+				for (j = 0; j < dw; j++)
+					printk(KERN_DEBUG "%8x%s", d[j],
+						(j%8) == 7 ? "\n" : " ");
+				printk(KERN_DEBUG ".\n");
+			}
 		}
 		l += rsize;
 		if (l >= maxcnt)
 			l = 0;
-		if (etype != RCVHQ_RCV_TYPE_EXPECTED)
-		    updegr = 1;
+		rhf_addr = (__le32 *) pd->port_rcvhdrq +
+			l + dd->ipath_rhf_offset;
+		if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
+			u32 seq = ipath_hdrget_seq(rhf_addr);
+
+			if (++pd->port_seq_cnt > 13)
+				pd->port_seq_cnt = 1;
+			if (seq != pd->port_seq_cnt)
+				last = 1;
+		} else if (l == hdrqtail)
+			last = 1;
 		/*
 		 * update head regs on last packet, and every 16 packets.
 		 * Reduce bus traffic, while still trying to prevent
 		 * rcvhdrq overflows, for when the queue is nearly full
 		 */
-		if (l == hdrqtail || (i && !(i&0xf))) {
-			u64 lval;
-			if (l == hdrqtail)
-				/* request IBA6120 interrupt only on last */
-				lval = dd->ipath_rhdrhead_intr_off | l;
-			else
-				lval = l;
-			(void)ipath_write_ureg(dd, ur_rcvhdrhead, lval, 0);
+		if (last || !(i & 0xf)) {
+			u64 lval = l;
+
+			/* request IBA6120 and 7220 interrupt only on last */
+			if (last)
+				lval |= dd->ipath_rhdrhead_intr_off;
+			ipath_write_ureg(dd, ur_rcvhdrhead, lval,
+				pd->port_port);
 			if (updegr) {
-				(void)ipath_write_ureg(dd, ur_rcvegrindexhead,
-						       etail, 0);
+				ipath_write_ureg(dd, ur_rcvegrindexhead,
+						 etail, pd->port_port);
 				updegr = 0;
 			}
 		}
 	}
 
-	if (!dd->ipath_rhdrhead_intr_off && !reloop) {
+	if (!dd->ipath_rhdrhead_intr_off && !reloop &&
+	    !(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
 		/* IBA6110 workaround; we can have a race clearing chip
 		 * interrupt with another interrupt about to be delivered,
 		 * and can clear it before it is delivered on the GPIO
@@ -1301,7 +1372,6 @@
 	 * happens when all buffers are in use, so only cpu overhead, not
 	 * latency or bandwidth is affected.
 	 */
-#define _IPATH_ALL_CHECKBITS 0x5555555555555555ULL
 	if (!dd->ipath_pioavailregs_dma) {
 		ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
 		return;
@@ -1346,7 +1416,7 @@
 			piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
 		else
 			piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
-		pchg = _IPATH_ALL_CHECKBITS &
+		pchg = dd->ipath_pioavailkernel[i] &
 			~(dd->ipath_pioavailshadow[i] ^ piov);
 		pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
 		if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
@@ -1397,27 +1467,63 @@
 	return ret;
 }
 
-/**
- * ipath_getpiobuf - find an available pio buffer
- * @dd: the infinipath device
- * @pbufnum: the buffer number is placed here
+/*
+ * debugging code and stats updates if no pio buffers available.
+ */
+static noinline void no_pio_bufs(struct ipath_devdata *dd)
+{
+	unsigned long *shadow = dd->ipath_pioavailshadow;
+	__le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma;
+
+	dd->ipath_upd_pio_shadow = 1;
+
+	/*
+	 * not atomic, but if we lose a stat count in a while, that's OK
+	 */
+	ipath_stats.sps_nopiobufs++;
+	if (!(++dd->ipath_consec_nopiobuf % 100000)) {
+		ipath_dbg("%u pio sends with no bufavail; dmacopy: "
+			"%llx %llx %llx %llx; shadow:  %lx %lx %lx %lx\n",
+			dd->ipath_consec_nopiobuf,
+			(unsigned long long) le64_to_cpu(dma[0]),
+			(unsigned long long) le64_to_cpu(dma[1]),
+			(unsigned long long) le64_to_cpu(dma[2]),
+			(unsigned long long) le64_to_cpu(dma[3]),
+			shadow[0], shadow[1], shadow[2], shadow[3]);
+		/*
+		 * 4 buffers per byte, 4 registers above, cover rest
+		 * below
+		 */
+		if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
+		    (sizeof(shadow[0]) * 4 * 4))
+			ipath_dbg("2nd group: dmacopy: %llx %llx "
+				  "%llx %llx; shadow: %lx %lx %lx %lx\n",
+				  (unsigned long long)le64_to_cpu(dma[4]),
+				  (unsigned long long)le64_to_cpu(dma[5]),
+				  (unsigned long long)le64_to_cpu(dma[6]),
+				  (unsigned long long)le64_to_cpu(dma[7]),
+				  shadow[4], shadow[5], shadow[6],
+				  shadow[7]);
+	}
+}
+
+/*
+ * common code for normal driver pio buffer allocation, and reserved
+ * allocation.
  *
  * do appropriate marking as busy, etc.
  * returns buffer number if one found (>=0), negative number is error.
- * Used by ipath_layer_send
  */
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
+static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd,
+	u32 *pbufnum, u32 first, u32 last, u32 firsti)
 {
-	int i, j, starti, updated = 0;
-	unsigned piobcnt, iter;
+	int i, j, updated = 0;
+	unsigned piobcnt;
 	unsigned long flags;
 	unsigned long *shadow = dd->ipath_pioavailshadow;
 	u32 __iomem *buf;
 
-	piobcnt = (unsigned)(dd->ipath_piobcnt2k
-			     + dd->ipath_piobcnt4k);
-	starti = dd->ipath_lastport_piobuf;
-	iter = piobcnt - starti;
+	piobcnt = last - first;
 	if (dd->ipath_upd_pio_shadow) {
 		/*
 		 * Minor optimization.  If we had no buffers on last call,
@@ -1425,12 +1531,10 @@
 		 * if no buffers were updated, to be paranoid
 		 */
 		ipath_update_pio_bufs(dd);
-		/* we scanned here, don't do it at end of scan */
-		updated = 1;
-		i = starti;
+		updated++;
+		i = first;
 	} else
-		i = dd->ipath_lastpioindex;
-
+		i = firsti;
 rescan:
 	/*
 	 * while test_and_set_bit() is atomic, we do that and then the
@@ -1438,104 +1542,141 @@
 	 * of the remaining armlaunch errors.
 	 */
 	spin_lock_irqsave(&ipath_pioavail_lock, flags);
-	for (j = 0; j < iter; j++, i++) {
-		if (i >= piobcnt)
-			i = starti;
-		/*
-		 * To avoid bus lock overhead, we first find a candidate
-		 * buffer, then do the test and set, and continue if that
-		 * fails.
-		 */
-		if (test_bit((2 * i) + 1, shadow) ||
-		    test_and_set_bit((2 * i) + 1, shadow))
+	for (j = 0; j < piobcnt; j++, i++) {
+		if (i >= last)
+			i = first;
+		if (__test_and_set_bit((2 * i) + 1, shadow))
 			continue;
 		/* flip generation bit */
-		change_bit(2 * i, shadow);
+		__change_bit(2 * i, shadow);
 		break;
 	}
 	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
 
-	if (j == iter) {
-		volatile __le64 *dma = dd->ipath_pioavailregs_dma;
-
-		/*
-		 * first time through; shadow exhausted, but may be real
-		 * buffers available, so go see; if any updated, rescan
-		 * (once)
-		 */
+	if (j == piobcnt) {
 		if (!updated) {
+			/*
+			 * first time through; shadow exhausted, but may be
+			 * buffers available, try an update and then rescan.
+			 */
 			ipath_update_pio_bufs(dd);
-			updated = 1;
-			i = starti;
+			updated++;
+			i = first;
+			goto rescan;
+		} else if (updated == 1 && piobcnt <=
+			((dd->ipath_sendctrl
+			>> INFINIPATH_S_UPDTHRESH_SHIFT) &
+			INFINIPATH_S_UPDTHRESH_MASK)) {
+			/*
+			 * for chips supporting and using the update
+			 * threshold we need to force an update of the
+			 * in-memory copy if the count is less than the
+			 * thershold, then check one more time.
+			 */
+			ipath_force_pio_avail_update(dd);
+			ipath_update_pio_bufs(dd);
+			updated++;
+			i = first;
 			goto rescan;
 		}
-		dd->ipath_upd_pio_shadow = 1;
-		/*
-		 * not atomic, but if we lose one once in a while, that's OK
-		 */
-		ipath_stats.sps_nopiobufs++;
-		if (!(++dd->ipath_consec_nopiobuf % 100000)) {
-			ipath_dbg(
-				"%u pio sends with no bufavail; dmacopy: "
-				"%llx %llx %llx %llx; shadow:  "
-				"%lx %lx %lx %lx\n",
-				dd->ipath_consec_nopiobuf,
-				(unsigned long long) le64_to_cpu(dma[0]),
-				(unsigned long long) le64_to_cpu(dma[1]),
-				(unsigned long long) le64_to_cpu(dma[2]),
-				(unsigned long long) le64_to_cpu(dma[3]),
-				shadow[0], shadow[1], shadow[2],
-				shadow[3]);
-			/*
-			 * 4 buffers per byte, 4 registers above, cover rest
-			 * below
-			 */
-			if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
-			    (sizeof(shadow[0]) * 4 * 4))
-				ipath_dbg("2nd group: dmacopy: %llx %llx "
-					  "%llx %llx; shadow: %lx %lx "
-					  "%lx %lx\n",
-					  (unsigned long long)
-					  le64_to_cpu(dma[4]),
-					  (unsigned long long)
-					  le64_to_cpu(dma[5]),
-					  (unsigned long long)
-					  le64_to_cpu(dma[6]),
-					  (unsigned long long)
-					  le64_to_cpu(dma[7]),
-					  shadow[4], shadow[5],
-					  shadow[6], shadow[7]);
-		}
+
+		no_pio_bufs(dd);
 		buf = NULL;
-		goto bail;
+	} else {
+		if (i < dd->ipath_piobcnt2k)
+			buf = (u32 __iomem *) (dd->ipath_pio2kbase +
+					       i * dd->ipath_palign);
+		else
+			buf = (u32 __iomem *)
+				(dd->ipath_pio4kbase +
+				 (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
+		if (pbufnum)
+			*pbufnum = i;
 	}
 
-	/*
-	 * set next starting place.  Since it's just an optimization,
-	 * it doesn't matter who wins on this, so no locking
-	 */
-	dd->ipath_lastpioindex = i + 1;
-	if (dd->ipath_upd_pio_shadow)
-		dd->ipath_upd_pio_shadow = 0;
-	if (dd->ipath_consec_nopiobuf)
-		dd->ipath_consec_nopiobuf = 0;
-	if (i < dd->ipath_piobcnt2k)
-		buf = (u32 __iomem *) (dd->ipath_pio2kbase +
-				       i * dd->ipath_palign);
-	else
-		buf = (u32 __iomem *)
-			(dd->ipath_pio4kbase +
-			 (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
-	ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
-		   i, (i < dd->ipath_piobcnt2k) ? 2 : 4, buf);
-	if (pbufnum)
-		*pbufnum = i;
-
-bail:
 	return buf;
 }
 
 /**
+ * ipath_getpiobuf - find an available pio buffer
+ * @dd: the infinipath device
+ * @plen: the size of the PIO buffer needed in 32-bit words
+ * @pbufnum: the buffer number is placed here
+ */
+u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum)
+{
+	u32 __iomem *buf;
+	u32 pnum, nbufs;
+	u32 first, lasti;
+
+	if (plen + 1 >= IPATH_SMALLBUF_DWORDS) {
+		first = dd->ipath_piobcnt2k;
+		lasti = dd->ipath_lastpioindexl;
+	} else {
+		first = 0;
+		lasti = dd->ipath_lastpioindex;
+	}
+	nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+	buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti);
+
+	if (buf) {
+		/*
+		 * Set next starting place.  It's just an optimization,
+		 * it doesn't matter who wins on this, so no locking
+		 */
+		if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
+			dd->ipath_lastpioindexl = pnum + 1;
+		else
+			dd->ipath_lastpioindex = pnum + 1;
+		if (dd->ipath_upd_pio_shadow)
+			dd->ipath_upd_pio_shadow = 0;
+		if (dd->ipath_consec_nopiobuf)
+			dd->ipath_consec_nopiobuf = 0;
+		ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
+			   pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf);
+		if (pbufnum)
+			*pbufnum = pnum;
+
+	}
+	return buf;
+}
+
+/**
+ * ipath_chg_pioavailkernel - change which send buffers are available for kernel
+ * @dd: the infinipath device
+ * @start: the starting send buffer number
+ * @len: the number of send buffers
+ * @avail: true if the buffers are available for kernel use, false otherwise
+ */
+void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
+			      unsigned len, int avail)
+{
+	unsigned long flags;
+	unsigned end;
+
+	/* There are two bits per send buffer (busy and generation) */
+	start *= 2;
+	len *= 2;
+	end = start + len;
+
+	/* Set or clear the generation bits. */
+	spin_lock_irqsave(&ipath_pioavail_lock, flags);
+	while (start < end) {
+		if (avail) {
+			__clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
+				dd->ipath_pioavailshadow);
+			__set_bit(start, dd->ipath_pioavailkernel);
+		} else {
+			__set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
+				dd->ipath_pioavailshadow);
+			__clear_bit(start, dd->ipath_pioavailkernel);
+		}
+		start += 2;
+	}
+	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+}
+
+/**
  * ipath_create_rcvhdrq - create a receive header queue
  * @dd: the infinipath device
  * @pd: the port data
@@ -1566,19 +1707,27 @@
 			ret = -ENOMEM;
 			goto bail;
 		}
-		pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
-			&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, GFP_KERNEL);
-		if (!pd->port_rcvhdrtail_kvaddr) {
-			ipath_dev_err(dd, "attempt to allocate 1 page "
-				      "for port %u rcvhdrqtailaddr failed\n",
-				      pd->port_port);
-			ret = -ENOMEM;
-			dma_free_coherent(&dd->pcidev->dev, amt,
-					  pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
-			pd->port_rcvhdrq = NULL;
-			goto bail;
+
+		if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
+			pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
+				&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
+				GFP_KERNEL);
+			if (!pd->port_rcvhdrtail_kvaddr) {
+				ipath_dev_err(dd, "attempt to allocate 1 page "
+					"for port %u rcvhdrqtailaddr "
+					"failed\n", pd->port_port);
+				ret = -ENOMEM;
+				dma_free_coherent(&dd->pcidev->dev, amt,
+					pd->port_rcvhdrq,
+					pd->port_rcvhdrq_phys);
+				pd->port_rcvhdrq = NULL;
+				goto bail;
+			}
+			pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
+			ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx "
+				   "physical\n", pd->port_port,
+				   (unsigned long long) phys_hdrqtail);
 		}
-		pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
 
 		pd->port_rcvhdrq_size = amt;
 
@@ -1588,10 +1737,6 @@
 			   (unsigned long) pd->port_rcvhdrq_phys,
 			   (unsigned long) pd->port_rcvhdrq_size,
 			   pd->port_port);
-
-		ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx physical\n",
-			   pd->port_port,
-			   (unsigned long long) phys_hdrqtail);
 	}
 	else
 		ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
@@ -1615,7 +1760,6 @@
 	ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
 			      pd->port_port, pd->port_rcvhdrq_phys);
 
-	ret = 0;
 bail:
 	return ret;
 }
@@ -1632,52 +1776,149 @@
  */
 void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
 {
-	ipath_dbg("Cancelling all in-progress send buffers\n");
-	dd->ipath_lastcancel = jiffies+HZ/2; /* skip armlaunch errs a bit */
-	/*
-	 * the abort bit is auto-clearing.  We read scratch to be sure
-	 * that cancels and the abort have taken effect in the chip.
-	 */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-		INFINIPATH_S_ABORT);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	ipath_disarm_piobufs(dd, 0,
-		(unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k));
-	if (restore_sendctrl) /* else done by caller later */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-				 dd->ipath_sendctrl);
+	unsigned long flags;
 
-	/* and again, be sure all have hit the chip */
+	if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
+		ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n");
+		goto bail;
+	}
+	/*
+	 * If we have SDMA, and it's not disabled, we have to kick off the
+	 * abort state machine, provided we aren't already aborting.
+	 * If we are in the process of aborting SDMA (!DISABLED, but ABORTING),
+	 * we skip the rest of this routine. It is already "in progress"
+	 */
+	if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
+		int skip_cancel;
+		u64 *statp = &dd->ipath_sdma_status;
+
+		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+		skip_cancel =
+			!test_bit(IPATH_SDMA_DISABLED, statp) &&
+			test_and_set_bit(IPATH_SDMA_ABORTING, statp);
+		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+		if (skip_cancel)
+			goto bail;
+	}
+
+	ipath_dbg("Cancelling all in-progress send buffers\n");
+
+	/* skip armlaunch errs for a while */
+	dd->ipath_lastcancel = jiffies + HZ / 2;
+
+	/*
+	 * The abort bit is auto-clearing.  We also don't want pioavail
+	 * update happening during this, and we don't want any other
+	 * sends going out, so turn those off for the duration.  We read
+	 * the scratch register to be sure that cancels and the abort
+	 * have taken effect in the chip.  Otherwise two parts are same
+	 * as ipath_force_pio_avail_update()
+	 */
+	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+	dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD
+		| INFINIPATH_S_PIOENABLE);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+		dd->ipath_sendctrl | INFINIPATH_S_ABORT);
 	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+	/* disarm all send buffers */
+	ipath_disarm_piobufs(dd, 0,
+		dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
+
+	if (restore_sendctrl) {
+		/* else done by caller later if needed */
+		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+		dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD |
+			INFINIPATH_S_PIOENABLE;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+			dd->ipath_sendctrl);
+		/* and again, be sure all have hit the chip */
+		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+	}
+
+	if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) &&
+	    !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) &&
+	    test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) {
+		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+		/* only wait so long for intr */
+		dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
+		dd->ipath_sdma_reset_wait = 200;
+		__set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
+		if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+			tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
+		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+	}
+bail:;
 }
 
-
-static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
+/*
+ * Force an update of in-memory copy of the pioavail registers, when
+ * needed for any of a variety of reasons.  We read the scratch register
+ * to make it highly likely that the update will have happened by the
+ * time we return.  If already off (as in cancel_sends above), this
+ * routine is a nop, on the assumption that the caller will "do the
+ * right thing".
+ */
+void ipath_force_pio_avail_update(struct ipath_devdata *dd)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+	if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) {
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+			dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
+		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+			dd->ipath_sendctrl);
+		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	}
+	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+}
+
+static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd,
+				int linitcmd)
+{
+	u64 mod_wd;
 	static const char *what[4] = {
 		[0] = "NOP",
 		[INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN",
 		[INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
 		[INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
 	};
-	int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
-			INFINIPATH_IBCC_LINKCMD_MASK;
 
-	ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
-		   "is %s\n", dd->ipath_unit,
-		   what[linkcmd],
-		   ipath_ibcstatus_str[
-			   (ipath_read_kreg64
-			    (dd, dd->ipath_kregs->kr_ibcstatus) >>
-			    INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-			   INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
-	/* flush all queued sends when going to DOWN to be sure that
-	 * they don't block MAD packets */
-	if (linkcmd == INFINIPATH_IBCC_LINKCMD_DOWN)
-		ipath_cancel_sends(dd, 1);
+	if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) {
+		/*
+		 * If we are told to disable, note that so link-recovery
+		 * code does not attempt to bring us back up.
+		 */
+		preempt_disable();
+		dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
+		preempt_enable();
+	} else if (linitcmd) {
+		/*
+		 * Any other linkinitcmd will lead to LINKDOWN and then
+		 * to INIT (if all is well), so clear flag to let
+		 * link-recovery code attempt to bring us back up.
+		 */
+		preempt_disable();
+		dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
+		preempt_enable();
+	}
+
+	mod_wd = (linkcmd << dd->ibcc_lc_shift) |
+		(linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+	ipath_cdbg(VERBOSE,
+		"Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n",
+		dd->ipath_unit, what[linkcmd], linitcmd,
+		ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
+			ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
 
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
-			 dd->ipath_ibcctrl | which);
+			 dd->ipath_ibcctrl | mod_wd);
+	/* read from chip so write is flushed */
+	(void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
 }
 
 int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
@@ -1687,30 +1928,28 @@
 
 	switch (newstate) {
 	case IPATH_IB_LINKDOWN_ONLY:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN <<
-				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0);
 		/* don't wait */
 		ret = 0;
 		goto bail;
 
 	case IPATH_IB_LINKDOWN:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
-				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
+					INFINIPATH_IBCC_LINKINITCMD_POLL);
 		/* don't wait */
 		ret = 0;
 		goto bail;
 
 	case IPATH_IB_LINKDOWN_SLEEP:
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
-				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
+					INFINIPATH_IBCC_LINKINITCMD_SLEEP);
 		/* don't wait */
 		ret = 0;
 		goto bail;
 
 	case IPATH_IB_LINKDOWN_DISABLE:
-		ipath_set_ib_lstate(dd,
-				    INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
-				    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
+					INFINIPATH_IBCC_LINKINITCMD_DISABLE);
 		/* don't wait */
 		ret = 0;
 		goto bail;
@@ -1725,8 +1964,8 @@
 			ret = -EINVAL;
 			goto bail;
 		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
-				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0);
+
 		/*
 		 * Since the port can transition to ACTIVE by receiving
 		 * a non VL 15 packet, wait for either state.
@@ -1743,8 +1982,7 @@
 			ret = -EINVAL;
 			goto bail;
 		}
-		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
-				    INFINIPATH_IBCC_LINKCMD_SHIFT);
+		ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0);
 		lstate = IPATH_LINKACTIVE;
 		break;
 
@@ -1753,16 +1991,41 @@
 		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
 				 dd->ipath_ibcctrl);
+
+		/* turn heartbeat off, as it causes loopback to fail */
+		dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
+				       IPATH_IB_HRTBT_OFF);
+		/* don't wait */
 		ret = 0;
-		goto bail; // no state change to wait for
+		goto bail;
 
 	case IPATH_IB_LINK_EXTERNAL:
-		dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n");
+		dev_info(&dd->pcidev->dev,
+			"Disabling IB local loopback (normal)\n");
+		dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
+				       IPATH_IB_HRTBT_ON);
 		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
 				 dd->ipath_ibcctrl);
+		/* don't wait */
 		ret = 0;
-		goto bail; // no state change to wait for
+		goto bail;
+
+	/*
+	 * Heartbeat can be explicitly enabled by the user via
+	 * "hrtbt_enable" "file", and if disabled, trying to enable here
+	 * will have no effect.  Implicit changes (heartbeat off when
+	 * loopback on, and vice versa) are included to ease testing.
+	 */
+	case IPATH_IB_LINK_HRTBT:
+		ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
+			IPATH_IB_HRTBT_ON);
+		goto bail;
+
+	case IPATH_IB_LINK_NO_HRTBT:
+		ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
+			IPATH_IB_HRTBT_OFF);
+		goto bail;
 
 	default:
 		ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
@@ -1785,7 +2048,7 @@
  * sanity checking on this, and we don't deal with what happens to
  * programs that are already running when the size changes.
  * NOTE: changing the MTU will usually cause the IBC to go back to
- * link initialize (IPATH_IBSTATE_INIT) state...
+ * link INIT state...
  */
 int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
 {
@@ -1800,7 +2063,7 @@
 	 * piosize).  We check that it's one of the valid IB sizes.
 	 */
 	if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
-	    arg != 4096) {
+	    (arg != 4096 || !ipath_mtu4096)) {
 		ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
 		ret = -EINVAL;
 		goto bail;
@@ -1816,6 +2079,8 @@
 	if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
 		/* Only if it's not the initial value (or reset to it) */
 		if (piosize != dd->ipath_init_ibmaxlen) {
+			if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)
+				piosize = dd->ipath_init_ibmaxlen;
 			dd->ipath_ibmaxlen = piosize;
 			changed = 1;
 		}
@@ -1829,24 +2094,17 @@
 	}
 
 	if (changed) {
+		u64 ibc = dd->ipath_ibcctrl, ibdw;
 		/*
-		 * set the IBC maxpktlength to the size of our pio
-		 * buffers in words
+		 * update our housekeeping variables, and set IBC max
+		 * size, same as init code; max IBC is max we allow in
+		 * buffer, less the qword pbc, plus 1 for ICRC, in dwords
 		 */
-		u64 ibc = dd->ipath_ibcctrl;
+		dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);
+		ibdw = (dd->ipath_ibmaxlen >> 2) + 1;
 		ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
-			 INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
-
-		piosize = piosize - 2 * sizeof(u32);    /* ignore pbc */
-		dd->ipath_ibmaxlen = piosize;
-		piosize /= sizeof(u32); /* in words */
-		/*
-		 * for ICRC, which we only send in diag test pkt mode, and
-		 * we don't need to worry about that for mtu
-		 */
-		piosize += 1;
-
-		ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+			 dd->ibcc_mpl_shift);
+		ibc |= ibdw << dd->ibcc_mpl_shift;
 		dd->ipath_ibcctrl = ibc;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
 				 dd->ipath_ibcctrl);
@@ -1859,11 +2117,16 @@
 	return ret;
 }
 
-int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
+int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc)
 {
-	dd->ipath_lid = arg;
+	dd->ipath_lid = lid;
 	dd->ipath_lmc = lmc;
 
+	dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid |
+		(~((1U << lmc) - 1)) << 16);
+
+	dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid);
+
 	return 0;
 }
 
@@ -1925,10 +2188,8 @@
 	 * but leave that to per-chip functions.
 	 */
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-	ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-		  INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
-	lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
-		 INFINIPATH_IBCS_LINKSTATE_MASK;
+	ltstate = ipath_ib_linktrstate(dd, val);
+	lstate = ipath_ib_linkstate(dd, val);
 
 	dd->ipath_f_setextled(dd, lstate, ltstate);
 	mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
@@ -1969,9 +2230,8 @@
 		dd->ipath_led_override_timer.data = (unsigned long) dd;
 		dd->ipath_led_override_timer.expires = jiffies + 1;
 		add_timer(&dd->ipath_led_override_timer);
-	} else {
+	} else
 		atomic_dec(&dd->ipath_led_override_timer_active);
-	}
 }
 
 /**
@@ -1989,6 +2249,8 @@
 
 	ipath_dbg("Shutting down the device\n");
 
+	ipath_hol_up(dd); /* make sure user processes aren't suspended */
+
 	dd->ipath_flags |= IPATH_LINKUNK;
 	dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
 			     IPATH_LINKINIT | IPATH_LINKARMED |
@@ -2003,6 +2265,9 @@
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
 
+	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+		teardown_sdma(dd);
+
 	/*
 	 * gracefully stop all sends allowing any in progress to trickle out
 	 * first.
@@ -2020,10 +2285,16 @@
 	 */
 	udelay(5);
 
-	ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
-			    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+	dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */
+
+	ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE);
 	ipath_cancel_sends(dd, 0);
 
+	/*
+	 * we are shutting down, so tell components that care.  We don't do
+	 * this on just a link state change, much like ethernet, a cable
+	 * unplug, etc. doesn't change driver state
+	 */
 	signal_ib_event(dd, IB_EVENT_PORT_ERR);
 
 	/* disable IBC */
@@ -2038,10 +2309,20 @@
 	 */
 	dd->ipath_f_quiet_serdes(dd);
 
+	/* stop all the timers that might still be running */
+	del_timer_sync(&dd->ipath_hol_timer);
 	if (dd->ipath_stats_timer_active) {
 		del_timer_sync(&dd->ipath_stats_timer);
 		dd->ipath_stats_timer_active = 0;
 	}
+	if (dd->ipath_intrchk_timer.data) {
+		del_timer_sync(&dd->ipath_intrchk_timer);
+		dd->ipath_intrchk_timer.data = 0;
+	}
+	if (atomic_read(&dd->ipath_led_override_timer_active)) {
+		del_timer_sync(&dd->ipath_led_override_timer);
+		atomic_set(&dd->ipath_led_override_timer_active, 0);
+	}
 
 	/*
 	 * clear all interrupts and errors, so that the next time the driver
@@ -2115,13 +2396,13 @@
 		ipath_cdbg(VERBOSE, "free closed port %d "
 			   "ipath_port0_skbinfo @ %p\n", pd->port_port,
 			   skbinfo);
-		for (e = 0; e < dd->ipath_rcvegrcnt; e++)
-		if (skbinfo[e].skb) {
-			pci_unmap_single(dd->pcidev, skbinfo[e].phys,
-					 dd->ipath_ibmaxlen,
-					 PCI_DMA_FROMDEVICE);
-			dev_kfree_skb(skbinfo[e].skb);
-		}
+		for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++)
+			if (skbinfo[e].skb) {
+				pci_unmap_single(dd->pcidev, skbinfo[e].phys,
+						 dd->ipath_ibmaxlen,
+						 PCI_DMA_FROMDEVICE);
+				dev_kfree_skb(skbinfo[e].skb);
+			}
 		vfree(skbinfo);
 	}
 	kfree(pd->port_tid_pg_list);
@@ -2144,6 +2425,7 @@
 	 */
 	idr_init(&unit_table);
 	if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
+		printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n");
 		ret = -ENOMEM;
 		goto bail;
 	}
@@ -2235,13 +2517,18 @@
 			}
 		}
 
+	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+		teardown_sdma(dd);
+
 	dd->ipath_flags &= ~IPATH_INITTED;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
 	ret = dd->ipath_f_reset(dd);
-	if (ret != 1)
-		ipath_dbg("reset was not successful\n");
-	ipath_dbg("Trying to reinitialize unit %u after reset attempt\n",
-		  unit);
-	ret = ipath_init_chip(dd, 1);
+	if (ret == 1) {
+		ipath_dbg("Reinitializing unit %u after reset attempt\n",
+			  unit);
+		ret = ipath_init_chip(dd, 1);
+	} else
+		ret = -EAGAIN;
 	if (ret)
 		ipath_dev_err(dd, "Reinitialize unit %u after "
 			      "reset failed with %d\n", unit, ret);
@@ -2253,13 +2540,121 @@
 	return ret;
 }
 
+/*
+ * send a signal to all the processes that have the driver open
+ * through the normal interfaces (i.e., everything other than diags
+ * interface).  Returns number of signalled processes.
+ */
+static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
+{
+	int i, sub, any = 0;
+	pid_t pid;
+
+	if (!dd->ipath_pd)
+		return 0;
+	for (i = 1; i < dd->ipath_cfgports; i++) {
+		if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt ||
+		    !dd->ipath_pd[i]->port_pid)
+			continue;
+		pid = dd->ipath_pd[i]->port_pid;
+		dev_info(&dd->pcidev->dev, "context %d in use "
+			  "(PID %u), sending signal %d\n",
+			  i, pid, sig);
+		kill_proc(pid, sig, 1);
+		any++;
+		for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
+			pid = dd->ipath_pd[i]->port_subpid[sub];
+			if (!pid)
+				continue;
+			dev_info(&dd->pcidev->dev, "sub-context "
+				"%d:%d in use (PID %u), sending "
+				"signal %d\n", i, sub, pid, sig);
+			kill_proc(pid, sig, 1);
+			any++;
+		}
+	}
+	return any;
+}
+
+static void ipath_hol_signal_down(struct ipath_devdata *dd)
+{
+	if (ipath_signal_procs(dd, SIGSTOP))
+		ipath_dbg("Stopped some processes\n");
+	ipath_cancel_sends(dd, 1);
+}
+
+
+static void ipath_hol_signal_up(struct ipath_devdata *dd)
+{
+	if (ipath_signal_procs(dd, SIGCONT))
+		ipath_dbg("Continued some processes\n");
+}
+
+/*
+ * link is down, stop any users processes, and flush pending sends
+ * to prevent HoL blocking, then start the HoL timer that
+ * periodically continues, then stop procs, so they can detect
+ * link down if they want, and do something about it.
+ * Timer may already be running, so use __mod_timer, not add_timer.
+ */
+void ipath_hol_down(struct ipath_devdata *dd)
+{
+	dd->ipath_hol_state = IPATH_HOL_DOWN;
+	ipath_hol_signal_down(dd);
+	dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
+	dd->ipath_hol_timer.expires = jiffies +
+		msecs_to_jiffies(ipath_hol_timeout_ms);
+	__mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
+}
+
+/*
+ * link is up, continue any user processes, and ensure timer
+ * is a nop, if running.  Let timer keep running, if set; it
+ * will nop when it sees the link is up
+ */
+void ipath_hol_up(struct ipath_devdata *dd)
+{
+	ipath_hol_signal_up(dd);
+	dd->ipath_hol_state = IPATH_HOL_UP;
+}
+
+/*
+ * toggle the running/not running state of user proceses
+ * to prevent HoL blocking on chip resources, but still allow
+ * user processes to do link down special case handling.
+ * Should only be called via the timer
+ */
+void ipath_hol_event(unsigned long opaque)
+{
+	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+
+	if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
+		&& dd->ipath_hol_state != IPATH_HOL_UP) {
+		dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
+		ipath_dbg("Stopping processes\n");
+		ipath_hol_signal_down(dd);
+	} else { /* may do "extra" if also in ipath_hol_up() */
+		dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
+		ipath_dbg("Continuing processes\n");
+		ipath_hol_signal_up(dd);
+	}
+	if (dd->ipath_hol_state == IPATH_HOL_UP)
+		ipath_dbg("link's up, don't resched timer\n");
+	else {
+		dd->ipath_hol_timer.expires = jiffies +
+			msecs_to_jiffies(ipath_hol_timeout_ms);
+		__mod_timer(&dd->ipath_hol_timer,
+			dd->ipath_hol_timer.expires);
+	}
+}
+
 int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
 {
 	u64 val;
-	if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) {
+
+	if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK)
 		return -1;
-	}
-	if ( dd->ipath_rx_pol_inv != new_pol_inv ) {
+	if (dd->ipath_rx_pol_inv != new_pol_inv) {
 		dd->ipath_rx_pol_inv = new_pol_inv;
 		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
 		val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index e28a42f..dc37277 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -62,6 +62,33 @@
  * accessing eeprom contents from within the kernel, only via sysfs.
  */
 
+/* Added functionality for IBA7220-based cards */
+#define IPATH_EEPROM_DEV_V1 0xA0
+#define IPATH_EEPROM_DEV_V2 0xA2
+#define IPATH_TEMP_DEV 0x98
+#define IPATH_BAD_DEV (IPATH_EEPROM_DEV_V2+2)
+#define IPATH_NO_DEV (0xFF)
+
+/*
+ * The number of I2C chains is proliferating. Table below brings
+ * some order to the madness. The basic principle is that the
+ * table is scanned from the top, and a "probe" is made to the
+ * device probe_dev. If that succeeds, the chain is considered
+ * to be of that type, and dd->i2c_chain_type is set to the index+1
+ * of the entry.
+ * The +1 is so static initialization can mean "unknown, do probe."
+ */
+static struct i2c_chain_desc {
+	u8 probe_dev;	/* If seen at probe, chain is this type */
+	u8 eeprom_dev;	/* Dev addr (if any) for EEPROM */
+	u8 temp_dev;	/* Dev Addr (if any) for Temp-sense */
+} i2c_chains[] = {
+	{ IPATH_BAD_DEV, IPATH_NO_DEV, IPATH_NO_DEV }, /* pre-iba7220 bds */
+	{ IPATH_EEPROM_DEV_V1, IPATH_EEPROM_DEV_V1, IPATH_TEMP_DEV}, /* V1 */
+	{ IPATH_EEPROM_DEV_V2, IPATH_EEPROM_DEV_V2, IPATH_TEMP_DEV}, /* V2 */
+	{ IPATH_NO_DEV }
+};
+
 enum i2c_type {
 	i2c_line_scl = 0,
 	i2c_line_sda
@@ -75,13 +102,6 @@
 #define READ_CMD 1
 #define WRITE_CMD 0
 
-static int eeprom_init;
-
-/*
- * The gpioval manipulation really should be protected by spinlocks
- * or be converted to use atomic operations.
- */
-
 /**
  * i2c_gpio_set - set a GPIO line
  * @dd: the infinipath device
@@ -241,6 +261,27 @@
 }
 
 /**
+ * rd_byte - read a byte, leaving ACK, STOP, etc up to caller
+ * @dd: the infinipath device
+ *
+ * Returns byte shifted out of device
+ */
+static int rd_byte(struct ipath_devdata *dd)
+{
+	int bit_cntr, data;
+
+	data = 0;
+
+	for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
+		data <<= 1;
+		scl_out(dd, i2c_line_high);
+		data |= sda_in(dd, 0);
+		scl_out(dd, i2c_line_low);
+	}
+	return data;
+}
+
+/**
  * wr_byte - write a byte, one bit at a time
  * @dd: the infinipath device
  * @data: the byte to write
@@ -331,7 +372,6 @@
 	ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
 		   "is %llx\n", (unsigned long long) *gpioval);
 
-	eeprom_init = 1;
 	/*
 	 * This is to get the i2c into a known state, by first going low,
 	 * then tristate sda (and then tristate scl as first thing
@@ -340,12 +380,17 @@
 	scl_out(dd, i2c_line_low);
 	sda_out(dd, i2c_line_high);
 
+	/* Clock up to 9 cycles looking for SDA hi, then issue START and STOP */
 	while (clock_cycles_left--) {
 		scl_out(dd, i2c_line_high);
 
+		/* SDA seen high, issue START by dropping it while SCL high */
 		if (sda_in(dd, 0)) {
 			sda_out(dd, i2c_line_low);
 			scl_out(dd, i2c_line_low);
+			/* ATMEL spec says must be followed by STOP. */
+			scl_out(dd, i2c_line_high);
+			sda_out(dd, i2c_line_high);
 			ret = 0;
 			goto bail;
 		}
@@ -359,29 +404,121 @@
 	return ret;
 }
 
-/**
- * ipath_eeprom_read - receives bytes from the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: address to read from
- * @buffer: where to store result
- * @len: number of bytes to receive
+/*
+ * Probe for I2C device at specified address. Returns 0 for "success"
+ * to match rest of this file.
+ * Leave bus in "reasonable" state for further commands.
  */
+static int i2c_probe(struct ipath_devdata *dd, int devaddr)
+{
+	int ret = 0;
+
+	ret = eeprom_reset(dd);
+	if (ret) {
+		ipath_dev_err(dd, "Failed reset probing device 0x%02X\n",
+			      devaddr);
+		return ret;
+	}
+	/*
+	 * Reset no longer leaves bus in start condition, so normal
+	 * i2c_startcmd() will do.
+	 */
+	ret = i2c_startcmd(dd, devaddr | READ_CMD);
+	if (ret)
+		ipath_cdbg(VERBOSE, "Failed startcmd for device 0x%02X\n",
+			   devaddr);
+	else {
+		/*
+		 * Device did respond. Complete a single-byte read, because some
+		 * devices apparently cannot handle STOP immediately after they
+		 * ACK the start-cmd.
+		 */
+		int data;
+		data = rd_byte(dd);
+		stop_cmd(dd);
+		ipath_cdbg(VERBOSE, "Response from device 0x%02X\n", devaddr);
+	}
+	return ret;
+}
+
+/*
+ * Returns the "i2c type". This is a pointer to a struct that describes
+ * the I2C chain on this board. To minimize impact on struct ipath_devdata,
+ * the (small integer) index into the table is actually memoized, rather
+ * then the pointer.
+ * Memoization is because the type is determined on the first call per chip.
+ * An alternative would be to move type determination to early
+ * init code.
+ */
+static struct i2c_chain_desc *ipath_i2c_type(struct ipath_devdata *dd)
+{
+	int idx;
+
+	/* Get memoized index, from previous successful probes */
+	idx = dd->ipath_i2c_chain_type - 1;
+	if (idx >= 0 && idx < (ARRAY_SIZE(i2c_chains) - 1))
+		goto done;
+
+	idx = 0;
+	while (i2c_chains[idx].probe_dev != IPATH_NO_DEV) {
+		/* if probe succeeds, this is type */
+		if (!i2c_probe(dd, i2c_chains[idx].probe_dev))
+			break;
+		++idx;
+	}
+
+	/*
+	 * Old EEPROM (first entry) may require a reset after probe,
+	 * rather than being able to "start" after "stop"
+	 */
+	if (idx == 0)
+		eeprom_reset(dd);
+
+	if (i2c_chains[idx].probe_dev == IPATH_NO_DEV)
+		idx = -1;
+	else
+		dd->ipath_i2c_chain_type = idx + 1;
+done:
+	return (idx >= 0) ? i2c_chains + idx : NULL;
+}
 
 static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
 					u8 eeprom_offset, void *buffer, int len)
 {
-	/* compiler complains unless initialized */
-	u8 single_byte = 0;
-	int bit_cntr;
 	int ret;
+	struct i2c_chain_desc *icd;
+	u8 *bp = buffer;
 
-	if (!eeprom_init)
-		eeprom_reset(dd);
+	ret = 1;
+	icd = ipath_i2c_type(dd);
+	if (!icd)
+		goto bail;
 
-	eeprom_offset = (eeprom_offset << 1) | READ_CMD;
-
-	if (i2c_startcmd(dd, eeprom_offset)) {
-		ipath_dbg("Failed startcmd\n");
+	if (icd->eeprom_dev == IPATH_NO_DEV) {
+		/* legacy not-really-I2C */
+		ipath_cdbg(VERBOSE, "Start command only address\n");
+		eeprom_offset = (eeprom_offset << 1) | READ_CMD;
+		ret = i2c_startcmd(dd, eeprom_offset);
+	} else {
+		/* Actual I2C */
+		ipath_cdbg(VERBOSE, "Start command uses devaddr\n");
+		if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
+			ipath_dbg("Failed EEPROM startcmd\n");
+			stop_cmd(dd);
+			ret = 1;
+			goto bail;
+		}
+		ret = wr_byte(dd, eeprom_offset);
+		stop_cmd(dd);
+		if (ret) {
+			ipath_dev_err(dd, "Failed to write EEPROM address\n");
+			ret = 1;
+			goto bail;
+		}
+		ret = i2c_startcmd(dd, icd->eeprom_dev | READ_CMD);
+	}
+	if (ret) {
+		ipath_dbg("Failed startcmd for dev %02X\n", icd->eeprom_dev);
 		stop_cmd(dd);
 		ret = 1;
 		goto bail;
@@ -392,22 +529,11 @@
 	 * incrementing the address.
 	 */
 	while (len-- > 0) {
-		/* get data */
-		single_byte = 0;
-		for (bit_cntr = 8; bit_cntr; bit_cntr--) {
-			u8 bit;
-			scl_out(dd, i2c_line_high);
-			bit = sda_in(dd, 0);
-			single_byte |= bit << (bit_cntr - 1);
-			scl_out(dd, i2c_line_low);
-		}
-
+		/* get and store data */
+		*bp++ = rd_byte(dd);
 		/* send ack if not the last byte */
 		if (len)
 			send_ack(dd);
-
-		*((u8 *) buffer) = single_byte;
-		buffer++;
 	}
 
 	stop_cmd(dd);
@@ -418,31 +544,40 @@
 	return ret;
 }
 
-
-/**
- * ipath_eeprom_write - writes data to the eeprom via I2C
- * @dd: the infinipath device
- * @eeprom_offset: where to place data
- * @buffer: data to write
- * @len: number of bytes to write
- */
 static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
 				       const void *buffer, int len)
 {
-	u8 single_byte;
 	int sub_len;
 	const u8 *bp = buffer;
 	int max_wait_time, i;
 	int ret;
+	struct i2c_chain_desc *icd;
 
-	if (!eeprom_init)
-		eeprom_reset(dd);
+	ret = 1;
+	icd = ipath_i2c_type(dd);
+	if (!icd)
+		goto bail;
 
 	while (len > 0) {
-		if (i2c_startcmd(dd, (eeprom_offset << 1) | WRITE_CMD)) {
-			ipath_dbg("Failed to start cmd offset %u\n",
-				  eeprom_offset);
-			goto failed_write;
+		if (icd->eeprom_dev == IPATH_NO_DEV) {
+			if (i2c_startcmd(dd,
+					 (eeprom_offset << 1) | WRITE_CMD)) {
+				ipath_dbg("Failed to start cmd offset %u\n",
+					eeprom_offset);
+				goto failed_write;
+			}
+		} else {
+			/* Real I2C */
+			if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
+				ipath_dbg("Failed EEPROM startcmd\n");
+				goto failed_write;
+			}
+			ret = wr_byte(dd, eeprom_offset);
+			if (ret) {
+				ipath_dev_err(dd, "Failed to write EEPROM "
+					      "address\n");
+				goto failed_write;
+			}
 		}
 
 		sub_len = min(len, 4);
@@ -468,9 +603,11 @@
 		 * the writes have completed.   We do this inline to avoid
 		 * the debug prints that are in the real read routine
 		 * if the startcmd fails.
+		 * We also use the proper device address, so it doesn't matter
+		 * whether we have real eeprom_dev. legacy likes any address.
 		 */
 		max_wait_time = 100;
-		while (i2c_startcmd(dd, READ_CMD)) {
+		while (i2c_startcmd(dd, icd->eeprom_dev | READ_CMD)) {
 			stop_cmd(dd);
 			if (!--max_wait_time) {
 				ipath_dbg("Did not get successful read to "
@@ -478,15 +615,8 @@
 				goto failed_write;
 			}
 		}
-		/* now read the zero byte */
-		for (i = single_byte = 0; i < 8; i++) {
-			u8 bit;
-			scl_out(dd, i2c_line_high);
-			bit = sda_in(dd, 0);
-			scl_out(dd, i2c_line_low);
-			single_byte <<= 1;
-			single_byte |= bit;
-		}
+		/* now read (and ignore) the resulting byte */
+		rd_byte(dd);
 		stop_cmd(dd);
 	}
 
@@ -501,9 +631,12 @@
 	return ret;
 }
 
-/*
- * The public entry-points ipath_eeprom_read() and ipath_eeprom_write()
- * are now just wrappers around the internal functions.
+/**
+ * ipath_eeprom_read - receives bytes from the eeprom via I2C
+ * @dd: the infinipath device
+ * @eeprom_offset: address to read from
+ * @buffer: where to store result
+ * @len: number of bytes to receive
  */
 int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
 			void *buff, int len)
@@ -519,6 +652,13 @@
 	return ret;
 }
 
+/**
+ * ipath_eeprom_write - writes data to the eeprom via I2C
+ * @dd: the infinipath device
+ * @eeprom_offset: where to place data
+ * @buffer: data to write
+ * @len: number of bytes to write
+ */
 int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
 			const void *buff, int len)
 {
@@ -820,7 +960,7 @@
 	 * if we log an hour at 31 minutes, then we would need to set
 	 * active_time to -29 to accurately count the _next_ hour.
 	 */
-	if (new_time > 3600) {
+	if (new_time >= 3600) {
 		new_hrs = new_time / 3600;
 		atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
 		new_hrs += dd->ipath_eep_hrs;
@@ -885,3 +1025,159 @@
 	spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
 	return;
 }
+
+static int ipath_tempsense_internal_read(struct ipath_devdata *dd, u8 regnum)
+{
+	int ret;
+	struct i2c_chain_desc *icd;
+
+	ret = -ENOENT;
+
+	icd = ipath_i2c_type(dd);
+	if (!icd)
+		goto bail;
+
+	if (icd->temp_dev == IPATH_NO_DEV) {
+		/* tempsense only exists on new, real-I2C boards */
+		ret = -ENXIO;
+		goto bail;
+	}
+
+	if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
+		ipath_dbg("Failed tempsense startcmd\n");
+		stop_cmd(dd);
+		ret = -ENXIO;
+		goto bail;
+	}
+	ret = wr_byte(dd, regnum);
+	stop_cmd(dd);
+	if (ret) {
+		ipath_dev_err(dd, "Failed tempsense WR command %02X\n",
+			      regnum);
+		ret = -ENXIO;
+		goto bail;
+	}
+	if (i2c_startcmd(dd, icd->temp_dev | READ_CMD)) {
+		ipath_dbg("Failed tempsense RD startcmd\n");
+		stop_cmd(dd);
+		ret = -ENXIO;
+		goto bail;
+	}
+	/*
+	 * We can only clock out one byte per command, sensibly
+	 */
+	ret = rd_byte(dd);
+	stop_cmd(dd);
+
+bail:
+	return ret;
+}
+
+#define VALID_TS_RD_REG_MASK 0xBF
+
+/**
+ * ipath_tempsense_read - read register of temp sensor via I2C
+ * @dd: the infinipath device
+ * @regnum: register to read from
+ *
+ * returns reg contents (0..255) or < 0 for error
+ */
+int ipath_tempsense_read(struct ipath_devdata *dd, u8 regnum)
+{
+	int ret;
+
+	if (regnum > 7)
+		return -EINVAL;
+
+	/* return a bogus value for (the one) register we do not have */
+	if (!((1 << regnum) & VALID_TS_RD_REG_MASK))
+		return 0;
+
+	ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
+	if (!ret) {
+		ret = ipath_tempsense_internal_read(dd, regnum);
+		mutex_unlock(&dd->ipath_eep_lock);
+	}
+
+	/*
+	 * There are three possibilities here:
+	 * ret is actual value (0..255)
+	 * ret is -ENXIO or -EINVAL from code in this file
+	 * ret is -EINTR from mutex_lock_interruptible.
+	 */
+	return ret;
+}
+
+static int ipath_tempsense_internal_write(struct ipath_devdata *dd,
+					  u8 regnum, u8 data)
+{
+	int ret = -ENOENT;
+	struct i2c_chain_desc *icd;
+
+	icd = ipath_i2c_type(dd);
+	if (!icd)
+		goto bail;
+
+	if (icd->temp_dev == IPATH_NO_DEV) {
+		/* tempsense only exists on new, real-I2C boards */
+		ret = -ENXIO;
+		goto bail;
+	}
+	if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
+		ipath_dbg("Failed tempsense startcmd\n");
+		stop_cmd(dd);
+		ret = -ENXIO;
+		goto bail;
+	}
+	ret = wr_byte(dd, regnum);
+	if (ret) {
+		stop_cmd(dd);
+		ipath_dev_err(dd, "Failed to write tempsense command %02X\n",
+			      regnum);
+		ret = -ENXIO;
+		goto bail;
+	}
+	ret = wr_byte(dd, data);
+	stop_cmd(dd);
+	ret = i2c_startcmd(dd, icd->temp_dev | READ_CMD);
+	if (ret) {
+		ipath_dev_err(dd, "Failed tempsense data wrt to %02X\n",
+			      regnum);
+		ret = -ENXIO;
+	}
+
+bail:
+	return ret;
+}
+
+#define VALID_TS_WR_REG_MASK ((1 << 9) | (1 << 0xB) | (1 << 0xD))
+
+/**
+ * ipath_tempsense_write - write register of temp sensor via I2C
+ * @dd: the infinipath device
+ * @regnum: register to write
+ * @data: data to write
+ *
+ * returns 0 for success or < 0 for error
+ */
+int ipath_tempsense_write(struct ipath_devdata *dd, u8 regnum, u8 data)
+{
+	int ret;
+
+	if (regnum > 15 || !((1 << regnum) & VALID_TS_WR_REG_MASK))
+		return -EINVAL;
+
+	ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
+	if (!ret) {
+		ret = ipath_tempsense_internal_write(dd, regnum, data);
+		mutex_unlock(&dd->ipath_eep_lock);
+	}
+
+	/*
+	 * There are three possibilities here:
+	 * ret is 0 for success
+	 * ret is -ENXIO or -EINVAL from code in this file
+	 * ret is -EINTR from mutex_lock_interruptible.
+	 */
+	return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 7e025c8..1e627aa 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -36,21 +36,28 @@
 #include <linux/cdev.h>
 #include <linux/swap.h>
 #include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/jiffies.h>
 #include <asm/pgtable.h>
 
 #include "ipath_kernel.h"
 #include "ipath_common.h"
+#include "ipath_user_sdma.h"
 
 static int ipath_open(struct inode *, struct file *);
 static int ipath_close(struct inode *, struct file *);
 static ssize_t ipath_write(struct file *, const char __user *, size_t,
 			   loff_t *);
+static ssize_t ipath_writev(struct kiocb *, const struct iovec *,
+			    unsigned long , loff_t);
 static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
 static int ipath_mmap(struct file *, struct vm_area_struct *);
 
 static const struct file_operations ipath_file_ops = {
 	.owner = THIS_MODULE,
 	.write = ipath_write,
+	.aio_write = ipath_writev,
 	.open = ipath_open,
 	.release = ipath_close,
 	.poll = ipath_poll,
@@ -184,6 +191,29 @@
 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
 			dd->ipath_palign * kinfo->spi_piocnt * slave;
 	}
+
+	/*
+	 * Set the PIO avail update threshold to no larger
+	 * than the number of buffers per process. Note that
+	 * we decrease it here, but won't ever increase it.
+	 */
+	if (dd->ipath_pioupd_thresh &&
+	    kinfo->spi_piocnt < dd->ipath_pioupd_thresh) {
+		unsigned long flags;
+
+		dd->ipath_pioupd_thresh = kinfo->spi_piocnt;
+		ipath_dbg("Decreased pio update threshold to %u\n",
+			dd->ipath_pioupd_thresh);
+		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+		dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
+			<< INFINIPATH_S_UPDTHRESH_SHIFT);
+		dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
+			<< INFINIPATH_S_UPDTHRESH_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+			dd->ipath_sendctrl);
+		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+	}
+
 	if (shared) {
 		kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
 			dd->ipath_ureg_align * pd->port_port;
@@ -219,7 +249,12 @@
 	kinfo->spi_pioalign = dd->ipath_palign;
 
 	kinfo->spi_qpair = IPATH_KD_QP;
-	kinfo->spi_piosize = dd->ipath_ibmaxlen;
+	/*
+	 * user mode PIO buffers are always 2KB, even when 4KB can
+	 * be received, and sent via the kernel; this is ibmaxlen
+	 * for 2K MTU.
+	 */
+	kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32);
 	kinfo->spi_mtu = dd->ipath_ibmaxlen;	/* maxlen, not ibmtu */
 	kinfo->spi_port = pd->port_port;
 	kinfo->spi_subport = subport_fp(fp);
@@ -1598,6 +1633,9 @@
 		port_fp(fp) = pd;
 		pd->port_pid = current->pid;
 		strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
+		ipath_chg_pioavailkernel(dd,
+			dd->ipath_pbufsport * (pd->port_port - 1),
+			dd->ipath_pbufsport, 0);
 		ipath_stats.sps_ports++;
 		ret = 0;
 	} else
@@ -1760,7 +1798,7 @@
 	for (ndev = 0; ndev < devmax; ndev++) {
 		struct ipath_devdata *dd = ipath_lookup(ndev);
 
-		if (!dd)
+		if (!usable(dd))
 			continue;
 		for (i = 1; i < dd->ipath_cfgports; i++) {
 			struct ipath_portdata *pd = dd->ipath_pd[i];
@@ -1839,10 +1877,9 @@
 	if (ipath_compatible_subports(swmajor, swminor) &&
 	    uinfo->spu_subport_cnt &&
 	    (ret = find_shared_port(fp, uinfo))) {
-		mutex_unlock(&ipath_mutex);
 		if (ret > 0)
 			ret = 0;
-		goto done;
+		goto done_chk_sdma;
 	}
 
 	i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE;
@@ -1854,6 +1891,21 @@
 	else
 		ret = find_best_unit(fp, uinfo);
 
+done_chk_sdma:
+	if (!ret) {
+		struct ipath_filedata *fd = fp->private_data;
+		const struct ipath_portdata *pd = fd->pd;
+		const struct ipath_devdata *dd = pd->port_dd;
+
+		fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev,
+						      dd->ipath_unit,
+						      pd->port_port,
+						      fd->subport);
+
+		if (!fd->pq)
+			ret = -ENOMEM;
+	}
+
 	mutex_unlock(&ipath_mutex);
 
 done:
@@ -1922,22 +1974,25 @@
 	pd->port_hdrqfull_poll = pd->port_hdrqfull;
 
 	/*
-	 * now enable the port; the tail registers will be written to memory
-	 * by the chip as soon as it sees the write to
-	 * dd->ipath_kregs->kr_rcvctrl.  The update only happens on
-	 * transition from 0 to 1, so clear it first, then set it as part of
-	 * enabling the port.  This will (very briefly) affect any other
-	 * open ports, but it shouldn't be long enough to be an issue.
-	 * We explictly set the in-memory copy to 0 beforehand, so we don't
-	 * have to wait to be sure the DMA update has happened.
+	 * Now enable the port for receive.
+	 * For chips that are set to DMA the tail register to memory
+	 * when they change (and when the update bit transitions from
+	 * 0 to 1.  So for those chips, we turn it off and then back on.
+	 * This will (very briefly) affect any other open ports, but the
+	 * duration is very short, and therefore isn't an issue.  We
+	 * explictly set the in-memory tail copy to 0 beforehand, so we
+	 * don't have to wait to be sure the DMA update has happened
+	 * (chip resets head/tail to 0 on transition to enable).
 	 */
-	if (pd->port_rcvhdrtail_kvaddr)
-		ipath_clear_rcvhdrtail(pd);
 	set_bit(dd->ipath_r_portenable_shift + pd->port_port,
 		&dd->ipath_rcvctrl);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+	if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
+		if (pd->port_rcvhdrtail_kvaddr)
+			ipath_clear_rcvhdrtail(pd);
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			dd->ipath_rcvctrl &
 			~(1ULL << dd->ipath_r_tailupd_shift));
+	}
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
 	/* Notify any waiting slaves */
@@ -1965,14 +2020,15 @@
 	ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n",
 		   pd->port_port);
 	for (i = port_tidbase; i < maxtid; i++) {
-		if (!dd->ipath_pageshadow[i])
+		struct page *ps = dd->ipath_pageshadow[i];
+
+		if (!ps)
 			continue;
 
+		dd->ipath_pageshadow[i] = NULL;
 		pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
 			PAGE_SIZE, PCI_DMA_FROMDEVICE);
-		ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i],
-						  1);
-		dd->ipath_pageshadow[i] = NULL;
+		ipath_release_user_pages_on_close(&ps, 1);
 		cnt++;
 		ipath_stats.sps_pageunlocks++;
 	}
@@ -2007,6 +2063,13 @@
 		mutex_unlock(&ipath_mutex);
 		goto bail;
 	}
+
+	dd = pd->port_dd;
+
+	/* drain user sdma queue */
+	ipath_user_sdma_queue_drain(dd, fd->pq);
+	ipath_user_sdma_queue_destroy(fd->pq);
+
 	if (--pd->port_cnt) {
 		/*
 		 * XXX If the master closes the port before the slave(s),
@@ -2019,7 +2082,6 @@
 		goto bail;
 	}
 	port = pd->port_port;
-	dd = pd->port_dd;
 
 	if (pd->port_hdrqfull) {
 		ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
@@ -2039,7 +2101,7 @@
 			pd->port_rcvnowait = pd->port_pionowait = 0;
 	}
 	if (pd->port_flag) {
-		ipath_dbg("port %u port_flag still set to 0x%lx\n",
+		ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n",
 			  pd->port_port, pd->port_flag);
 		pd->port_flag = 0;
 	}
@@ -2076,6 +2138,7 @@
 
 		i = dd->ipath_pbufsport * (port - 1);
 		ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
+		ipath_chg_pioavailkernel(dd, i, dd->ipath_pbufsport, 1);
 
 		dd->ipath_f_clear_tids(dd, pd->port_port);
 
@@ -2140,17 +2203,31 @@
 	return ret;
 }
 
-static int ipath_force_pio_avail_update(struct ipath_devdata *dd)
+static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq,
+				   u32 __user *inflightp)
 {
-	unsigned long flags;
+	const u32 val = ipath_user_sdma_inflight_counter(pq);
 
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-		dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+	if (put_user(val, inflightp))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int ipath_sdma_get_complete(struct ipath_devdata *dd,
+				   struct ipath_user_sdma_queue *pq,
+				   u32 __user *completep)
+{
+	u32 val;
+	int err;
+
+	err = ipath_user_sdma_make_progress(dd, pq);
+	if (err < 0)
+		return err;
+
+	val = ipath_user_sdma_complete_counter(pq);
+	if (put_user(val, completep))
+		return -EFAULT;
 
 	return 0;
 }
@@ -2229,6 +2306,16 @@
 		dest = &cmd.cmd.armlaunch_ctrl;
 		src = &ucmd->cmd.armlaunch_ctrl;
 		break;
+	case IPATH_CMD_SDMA_INFLIGHT:
+		copy = sizeof(cmd.cmd.sdma_inflight);
+		dest = &cmd.cmd.sdma_inflight;
+		src = &ucmd->cmd.sdma_inflight;
+		break;
+	case IPATH_CMD_SDMA_COMPLETE:
+		copy = sizeof(cmd.cmd.sdma_complete);
+		dest = &cmd.cmd.sdma_complete;
+		src = &ucmd->cmd.sdma_complete;
+		break;
 	default:
 		ret = -EINVAL;
 		goto bail;
@@ -2299,7 +2386,7 @@
 					   cmd.cmd.slave_mask_addr);
 		break;
 	case IPATH_CMD_PIOAVAILUPD:
-		ret = ipath_force_pio_avail_update(pd->port_dd);
+		ipath_force_pio_avail_update(pd->port_dd);
 		break;
 	case IPATH_CMD_POLL_TYPE:
 		pd->poll_type = cmd.cmd.poll_type;
@@ -2310,6 +2397,17 @@
 		else
 			ipath_disable_armlaunch(pd->port_dd);
 		break;
+	case IPATH_CMD_SDMA_INFLIGHT:
+		ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp),
+					      (u32 __user *) (unsigned long)
+					      cmd.cmd.sdma_inflight);
+		break;
+	case IPATH_CMD_SDMA_COMPLETE:
+		ret = ipath_sdma_get_complete(pd->port_dd,
+					      user_sdma_queue_fp(fp),
+					      (u32 __user *) (unsigned long)
+					      cmd.cmd.sdma_complete);
+		break;
 	}
 
 	if (ret >= 0)
@@ -2319,6 +2417,20 @@
 	return ret;
 }
 
+static ssize_t ipath_writev(struct kiocb *iocb, const struct iovec *iov,
+			    unsigned long dim, loff_t off)
+{
+	struct file *filp = iocb->ki_filp;
+	struct ipath_filedata *fp = filp->private_data;
+	struct ipath_portdata *pd = port_fp(filp);
+	struct ipath_user_sdma_queue *pq = fp->pq;
+
+	if (!dim)
+		return -EINVAL;
+
+	return ipath_user_sdma_writev(pd->port_dd, pq, iov, dim);
+}
+
 static struct class *ipath_class;
 
 static int init_cdev(int minor, char *name, const struct file_operations *fops,
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 9e2ced3..02831ad 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -40,6 +40,7 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/htirq.h>
+#include <rdma/ib_verbs.h>
 
 #include "ipath_kernel.h"
 #include "ipath_registers.h"
@@ -305,7 +306,9 @@
 
 /* kr_intstatus, kr_intclear, kr_intmask bits */
 #define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
+#define INFINIPATH_I_RCVURG_SHIFT 0
 #define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
+#define INFINIPATH_I_RCVAVAIL_SHIFT 12
 
 /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
 #define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
@@ -476,7 +479,13 @@
 #define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
 			  << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
 
-static int ipath_ht_txe_recover(struct ipath_devdata *);
+static void ipath_ht_txe_recover(struct ipath_devdata *dd)
+{
+	++ipath_stats.sps_txeparity;
+	dev_info(&dd->pcidev->dev,
+		"Recovering from TXE PIO parity error\n");
+}
+
 
 /**
  * ipath_ht_handle_hwerrors - display hardware errors.
@@ -557,11 +566,11 @@
 		 * occur if a processor speculative read is done to the PIO
 		 * buffer while we are sending a packet, for example.
 		 */
-		if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd))
+		if (hwerrs & TXE_PIO_PARITY) {
+			ipath_ht_txe_recover(dd);
 			hwerrs &= ~TXE_PIO_PARITY;
-		if (hwerrs & RXE_EAGER_PARITY)
-			ipath_dev_err(dd, "RXE parity, Eager TID error is not "
-				"recoverable\n");
+		}
+
 		if (!hwerrs) {
 			ipath_dbg("Clearing freezemode on ignored or "
 				  "recovered hardware error\n");
@@ -735,11 +744,10 @@
 	 */
 	dd->ipath_flags |= IPATH_32BITCOUNTERS;
 	dd->ipath_flags |= IPATH_GPIO_INTR;
-	if (dd->ipath_htspeed != 800)
+	if (dd->ipath_lbus_speed != 800)
 		ipath_dev_err(dd,
 			      "Incorrectly configured for HT @ %uMHz\n",
-			      dd->ipath_htspeed);
-	ret = 0;
+			      dd->ipath_lbus_speed);
 
 	/*
 	 * set here, not in ipath_init_*_funcs because we have to do
@@ -839,7 +847,7 @@
 			/*
 			 * now write them back to clear the error.
 			 */
-			pci_write_config_byte(pdev, link_off,
+			pci_write_config_word(pdev, link_off,
 					      linkctrl & (0xf << 8));
 		}
 	}
@@ -904,7 +912,7 @@
 			break;
 		}
 
-		dd->ipath_htwidth = width;
+		dd->ipath_lbus_width = width;
 
 		if (linkwidth != 0x11) {
 			ipath_dev_err(dd, "Not configured for 16 bit HT "
@@ -952,8 +960,13 @@
 			speed = 200;
 			break;
 		}
-		dd->ipath_htspeed = speed;
+		dd->ipath_lbus_speed = speed;
 	}
+
+	snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
+		"HyperTransport,%uMHz,x%u\n",
+		dd->ipath_lbus_speed,
+		dd->ipath_lbus_width);
 }
 
 static int ipath_ht_intconfig(struct ipath_devdata *dd)
@@ -1653,22 +1666,6 @@
 }
 
 
-static int ipath_ht_txe_recover(struct ipath_devdata *dd)
-{
-	int cnt = ++ipath_stats.sps_txeparity;
-	if (cnt >= IPATH_MAX_PARITY_ATTEMPTS)  {
-		if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
-			ipath_dev_err(dd,
-				"Too many attempts to recover from "
-				"TXE parity, giving up\n");
-		return 0;
-	}
-	dev_info(&dd->pcidev->dev,
-		"Recovering from TXE PIO parity error\n");
-	return 1;
-}
-
-
 /**
  * ipath_init_ht_get_base_info - set chip-specific flags for user code
  * @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index c7a2f50..421cc2a 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -38,7 +38,7 @@
 #include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
-
+#include <rdma/ib_verbs.h>
 
 #include "ipath_kernel.h"
 #include "ipath_registers.h"
@@ -311,9 +311,14 @@
 	.cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
 };
 
+/* kr_control bits */
+#define INFINIPATH_C_RESET 1U
+
 /* kr_intstatus, kr_intclear, kr_intmask bits */
 #define INFINIPATH_I_RCVURG_MASK ((1U<<5)-1)
+#define INFINIPATH_I_RCVURG_SHIFT 0
 #define INFINIPATH_I_RCVAVAIL_MASK ((1U<<5)-1)
+#define INFINIPATH_I_RCVAVAIL_SHIFT 12
 
 /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
 #define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK  0x000000000000003fULL
@@ -338,6 +343,9 @@
 #define INFINIPATH_EXTS_MEMBIST_ENDTEST     0x0000000000004000
 #define INFINIPATH_EXTS_MEMBIST_FOUND       0x0000000000008000
 
+/* kr_xgxsconfig bits */
+#define INFINIPATH_XGXS_RESET          0x5ULL
+
 #define _IPATH_GPIO_SDA_NUM 1
 #define _IPATH_GPIO_SCL_NUM 0
 
@@ -346,6 +354,16 @@
 #define IPATH_GPIO_SCL (1ULL << \
 	(_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
 
+#define INFINIPATH_RT_BUFSIZE_MASK 0xe0000000ULL
+#define INFINIPATH_RT_BUFSIZE_SHIFTVAL(tid) \
+	((((tid) & INFINIPATH_RT_BUFSIZE_MASK) >> 29) + 11 - 1)
+#define INFINIPATH_RT_BUFSIZE(tid) (1 << INFINIPATH_RT_BUFSIZE_SHIFTVAL(tid))
+#define INFINIPATH_RT_IS_VALID(tid) \
+	(((tid) & INFINIPATH_RT_BUFSIZE_MASK) && \
+	 ((((tid) & INFINIPATH_RT_BUFSIZE_MASK) != INFINIPATH_RT_BUFSIZE_MASK)))
+#define INFINIPATH_RT_ADDR_MASK 0x1FFFFFFFULL /* 29 bits valid */
+#define INFINIPATH_RT_ADDR_SHIFT 10
+
 #define INFINIPATH_R_INTRAVAIL_SHIFT 16
 #define INFINIPATH_R_TAILUPD_SHIFT 31
 
@@ -372,6 +390,8 @@
 #define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
 		        INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
 		        << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
+			  << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
 
 static void ipath_pe_put_tid_2(struct ipath_devdata *, u64 __iomem *,
 			       u32, unsigned long);
@@ -450,10 +470,8 @@
 	 * make sure we get this much out, unless told to be quiet,
 	 * or it's occurred within the last 5 seconds
 	 */
-	if ((hwerrs & ~(dd->ipath_lasthwerror |
-			((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
-			  INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
-			 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
+	if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
+			RXE_EAGER_PARITY)) ||
 	    (ipath_debug & __IPATH_VERBDBG))
 		dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
 			 "(cleared)\n", (unsigned long long) hwerrs);
@@ -465,7 +483,7 @@
 			      (hwerrs & ~dd->ipath_hwe_bitsextant));
 
 	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-	if (ctrl & INFINIPATH_C_FREEZEMODE) {
+	if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
 		/*
 		 * parity errors in send memory are recoverable,
 		 * just cancel the send (if indicated in * sendbuffererror),
@@ -540,12 +558,40 @@
 				 dd->ipath_hwerrmask);
 	}
 
-	if (*msg)
-		ipath_dev_err(dd, "%s hardware error\n", msg);
-	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
+	if (hwerrs) {
 		/*
-		 * for /sys status file ; if no trailing } is copied, we'll
-		 * know it was truncated.
+		 * if any set that we aren't ignoring; only
+		 * make the complaint once, in case it's stuck
+		 * or recurring, and we get here multiple
+		 * times.
+		 */
+		ipath_dev_err(dd, "%s hardware error\n", msg);
+		if (dd->ipath_flags & IPATH_INITTED) {
+			ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+			ipath_setup_pe_setextled(dd,
+				INFINIPATH_IBCS_L_STATE_DOWN,
+				INFINIPATH_IBCS_LT_STATE_DISABLED);
+			ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+					  "mode), no longer usable, SN %.16s\n",
+					  dd->ipath_serial);
+			isfatal = 1;
+		}
+		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+		/* mark as having had error */
+		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+		/*
+		 * mark as not usable, at a minimum until driver
+		 * is reloaded, probably until reboot, since no
+		 * other reset is possible.
+		 */
+		dd->ipath_flags &= ~IPATH_INITTED;
+	} else
+		*msg = 0; /* recovered from all of them */
+
+	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg && msg) {
+		/*
+		 * for /sys status file ; if no trailing brace is copied,
+		 * we'll know it was truncated.
 		 */
 		snprintf(dd->ipath_freezemsg, dd->ipath_freezelen,
 			 "{%s}", msg);
@@ -610,7 +656,6 @@
 			dd->ipath_f_put_tid = ipath_pe_put_tid_2;
 	}
 
-
 	/*
 	 * set here, not in ipath_init_*_funcs because we have to do
 	 * it after we can read chip registers.
@@ -838,7 +883,7 @@
 	extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
 				       INFINIPATH_EXTC_LED2PRIPORT_ON);
 
-	if (ltst & INFINIPATH_IBCS_LT_STATE_LINKUP)
+	if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
 		extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
 	if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
 		extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
@@ -863,6 +908,62 @@
 	pci_disable_msi(dd->pcidev);
 }
 
+static void ipath_6120_pcie_params(struct ipath_devdata *dd)
+{
+	u16 linkstat, speed;
+	int pos;
+
+	pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP);
+	if (!pos) {
+		ipath_dev_err(dd, "Can't find PCI Express capability!\n");
+		goto bail;
+	}
+
+	pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA,
+			     &linkstat);
+	/*
+	 * speed is bits 0-4, linkwidth is bits 4-8
+	 * no defines for them in headers
+	 */
+	speed = linkstat & 0xf;
+	linkstat >>= 4;
+	linkstat &= 0x1f;
+	dd->ipath_lbus_width = linkstat;
+
+	switch (speed) {
+	case 1:
+		dd->ipath_lbus_speed = 2500; /* Gen1, 2.5GHz */
+		break;
+	case 2:
+		dd->ipath_lbus_speed = 5000; /* Gen1, 5GHz */
+		break;
+	default: /* not defined, assume gen1 */
+		dd->ipath_lbus_speed = 2500;
+		break;
+	}
+
+	if (linkstat < 8)
+		ipath_dev_err(dd,
+			"PCIe width %u (x8 HCA), performance reduced\n",
+			linkstat);
+	else
+		ipath_cdbg(VERBOSE, "PCIe speed %u width %u (x8 HCA)\n",
+			dd->ipath_lbus_speed, linkstat);
+
+	if (speed != 1)
+		ipath_dev_err(dd,
+			"PCIe linkspeed %u is incorrect; "
+			"should be 1 (2500)!\n", speed);
+bail:
+	/* fill in string, even on errors */
+	snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
+		"PCIe,%uMHz,x%u\n",
+		dd->ipath_lbus_speed,
+		dd->ipath_lbus_width);
+
+	return;
+}
+
 /**
  * ipath_setup_pe_config - setup PCIe config related stuff
  * @dd: the infinipath device
@@ -920,19 +1021,8 @@
 	} else
 		ipath_dev_err(dd, "Can't find MSI capability, "
 			      "can't save MSI settings for reset\n");
-	if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP))) {
-		u16 linkstat;
-		pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA,
-				     &linkstat);
-		linkstat >>= 4;
-		linkstat &= 0x1f;
-		if (linkstat != 8)
-			ipath_dev_err(dd, "PCIe width %u, "
-				      "performance reduced\n", linkstat);
-	}
-	else
-		ipath_dev_err(dd, "Can't find PCI Express "
-			      "capability!\n");
+
+	ipath_6120_pcie_params(dd);
 
 	dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
 	dd->ipath_link_speed_supported = IPATH_IB_SDR;
@@ -1065,10 +1155,7 @@
 		INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
 		INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
 
-	dd->ipath_eep_st_masks[2].errs_to_log =
-		INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
-
-
+	dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
 	dd->delay_mult = 2; /* SDR, 4X, can't change */
 }
 
@@ -1142,6 +1229,9 @@
 	u64 val;
 	int i;
 	int ret;
+	u16 cmdval;
+
+	pci_read_config_word(dd->pcidev, PCI_COMMAND, &cmdval);
 
 	/* Use ERROR so it shows up in logs, etc. */
 	ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
@@ -1169,10 +1259,14 @@
 			ipath_dev_err(dd, "rewrite of BAR1 failed: %d\n",
 				      r);
 		/* now re-enable memory access */
+		pci_write_config_word(dd->pcidev, PCI_COMMAND, cmdval);
 		if ((r = pci_enable_device(dd->pcidev)))
 			ipath_dev_err(dd, "pci_enable_device failed after "
 				      "reset: %d\n", r);
-		/* whether it worked or not, mark as present, again */
+		/*
+		 * whether it fully enabled or not, mark as present,
+		 * again (but not INITTED)
+		 */
 		dd->ipath_flags |= IPATH_PRESENT;
 		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
 		if (val == dd->ipath_revision) {
@@ -1190,6 +1284,8 @@
 	ret = 0; /* failed */
 
 bail:
+	if (ret)
+		ipath_6120_pcie_params(dd);
 	return ret;
 }
 
@@ -1209,16 +1305,21 @@
 {
 	u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
 	unsigned long flags = 0; /* keep gcc quiet */
+	int tidx;
+	spinlock_t *tidlockp;
+
+	if (!dd->ipath_kregbase)
+		return;
 
 	if (pa != dd->ipath_tidinvalid) {
 		if (pa & ((1U << 11) - 1)) {
 			dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
-				 "not 4KB aligned!\n", pa);
+				 "not 2KB aligned!\n", pa);
 			return;
 		}
 		pa >>= 11;
 		/* paranoia check */
-		if (pa & (7<<29))
+		if (pa & ~INFINIPATH_RT_ADDR_MASK)
 			ipath_dev_err(dd,
 				      "BUG: Physical page address 0x%lx "
 				      "has bits set in 31-29\n", pa);
@@ -1238,14 +1339,22 @@
 	 * call can be done from interrupt level for the port 0 eager TIDs,
 	 * so we have to use irqsave locks.
 	 */
-	spin_lock_irqsave(&dd->ipath_tid_lock, flags);
+	/*
+	 * Assumes tidptr always > ipath_egrtidbase
+	 * if type == RCVHQ_RCV_TYPE_EAGER.
+	 */
+	tidx = tidptr - dd->ipath_egrtidbase;
+
+	tidlockp = (type == RCVHQ_RCV_TYPE_EAGER && tidx < dd->ipath_rcvegrcnt)
+		? &dd->ipath_kernel_tid_lock : &dd->ipath_user_tid_lock;
+	spin_lock_irqsave(tidlockp, flags);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeeddeaf);
-	if (dd->ipath_kregbase)
-		writel(pa, tidp32);
+	writel(pa, tidp32);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xdeadbeef);
 	mmiowb();
-	spin_unlock_irqrestore(&dd->ipath_tid_lock, flags);
+	spin_unlock_irqrestore(tidlockp, flags);
 }
+
 /**
  * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
  * @dd: the infinipath device
@@ -1261,6 +1370,10 @@
 			     u32 type, unsigned long pa)
 {
 	u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
+	u32 tidx;
+
+	if (!dd->ipath_kregbase)
+		return;
 
 	if (pa != dd->ipath_tidinvalid) {
 		if (pa & ((1U << 11) - 1)) {
@@ -1270,7 +1383,7 @@
 		}
 		pa >>= 11;
 		/* paranoia check */
-		if (pa & (7<<29))
+		if (pa & ~INFINIPATH_RT_ADDR_MASK)
 			ipath_dev_err(dd,
 				      "BUG: Physical page address 0x%lx "
 				      "has bits set in 31-29\n", pa);
@@ -1280,8 +1393,8 @@
 		else /* for now, always full 4KB page */
 			pa |= 2 << 29;
 	}
-	if (dd->ipath_kregbase)
-		writel(pa, tidp32);
+	tidx = tidptr - dd->ipath_egrtidbase;
+	writel(pa, tidp32);
 	mmiowb();
 }
 
@@ -1379,17 +1492,13 @@
 	dd->ipath_egrtidbase = (u64 __iomem *)
 		((char __iomem *) dd->ipath_kregbase + dd->ipath_rcvegrbase);
 
-	/*
-	 * To truly support a 4KB MTU (for usermode), we need to
-	 * bump this to a larger value.  For now, we use them for
-	 * the kernel only.
-	 */
-	dd->ipath_rcvegrbufsize = 2048;
+	dd->ipath_rcvegrbufsize = ipath_mtu4096 ? 4096 : 2048;
 	/*
 	 * the min() check here is currently a nop, but it may not always
 	 * be, depending on just how we do ipath_rcvegrbufsize
 	 */
-	dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
+	dd->ipath_ibmaxlen = min(ipath_mtu4096 ? dd->ipath_piosize4k :
+				 dd->ipath_piosize2k,
 				 dd->ipath_rcvegrbufsize +
 				 (dd->ipath_rcvhdrentsize << 2));
 	dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
new file mode 100644
index 0000000..1b2de2c
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c
@@ -0,0 +1,2571 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/*
+ * This file contains all of the code that is specific to the
+ * InfiniPath 7220 chip (except that specific to the SerDes)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <rdma/ib_verbs.h>
+
+#include "ipath_kernel.h"
+#include "ipath_registers.h"
+#include "ipath_7220.h"
+
+static void ipath_setup_7220_setextled(struct ipath_devdata *, u64, u64);
+
+static unsigned ipath_compat_ddr_negotiate = 1;
+
+module_param_named(compat_ddr_negotiate, ipath_compat_ddr_negotiate, uint,
+			S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(compat_ddr_negotiate,
+		"Attempt pre-IBTA 1.2 DDR speed negotiation");
+
+static unsigned ipath_sdma_fetch_arb = 1;
+module_param_named(fetch_arb, ipath_sdma_fetch_arb, uint, S_IRUGO);
+MODULE_PARM_DESC(fetch_arb, "IBA7220: change SDMA descriptor arbitration");
+
+/*
+ * This file contains almost all the chip-specific register information and
+ * access functions for the QLogic InfiniPath 7220 PCI-Express chip, with the
+ * exception of SerDes support, which in in ipath_sd7220.c.
+ *
+ * This lists the InfiniPath registers, in the actual chip layout.
+ * This structure should never be directly accessed.
+ */
+struct _infinipath_do_not_use_kernel_regs {
+	unsigned long long Revision;
+	unsigned long long Control;
+	unsigned long long PageAlign;
+	unsigned long long PortCnt;
+	unsigned long long DebugPortSelect;
+	unsigned long long DebugSigsIntSel; /* was Reserved0;*/
+	unsigned long long SendRegBase;
+	unsigned long long UserRegBase;
+	unsigned long long CounterRegBase;
+	unsigned long long Scratch;
+	unsigned long long EEPROMAddrCmd; /* was Reserved1; */
+	unsigned long long EEPROMData; /* was Reserved2; */
+	unsigned long long IntBlocked;
+	unsigned long long IntMask;
+	unsigned long long IntStatus;
+	unsigned long long IntClear;
+	unsigned long long ErrorMask;
+	unsigned long long ErrorStatus;
+	unsigned long long ErrorClear;
+	unsigned long long HwErrMask;
+	unsigned long long HwErrStatus;
+	unsigned long long HwErrClear;
+	unsigned long long HwDiagCtrl;
+	unsigned long long MDIO;
+	unsigned long long IBCStatus;
+	unsigned long long IBCCtrl;
+	unsigned long long ExtStatus;
+	unsigned long long ExtCtrl;
+	unsigned long long GPIOOut;
+	unsigned long long GPIOMask;
+	unsigned long long GPIOStatus;
+	unsigned long long GPIOClear;
+	unsigned long long RcvCtrl;
+	unsigned long long RcvBTHQP;
+	unsigned long long RcvHdrSize;
+	unsigned long long RcvHdrCnt;
+	unsigned long long RcvHdrEntSize;
+	unsigned long long RcvTIDBase;
+	unsigned long long RcvTIDCnt;
+	unsigned long long RcvEgrBase;
+	unsigned long long RcvEgrCnt;
+	unsigned long long RcvBufBase;
+	unsigned long long RcvBufSize;
+	unsigned long long RxIntMemBase;
+	unsigned long long RxIntMemSize;
+	unsigned long long RcvPartitionKey;
+	unsigned long long RcvQPMulticastPort;
+	unsigned long long RcvPktLEDCnt;
+	unsigned long long IBCDDRCtrl;
+	unsigned long long HRTBT_GUID;
+	unsigned long long IB_SDTEST_IF_TX;
+	unsigned long long IB_SDTEST_IF_RX;
+	unsigned long long IBCDDRCtrl2;
+	unsigned long long IBCDDRStatus;
+	unsigned long long JIntReload;
+	unsigned long long IBNCModeCtrl;
+	unsigned long long SendCtrl;
+	unsigned long long SendBufBase;
+	unsigned long long SendBufSize;
+	unsigned long long SendBufCnt;
+	unsigned long long SendAvailAddr;
+	unsigned long long TxIntMemBase;
+	unsigned long long TxIntMemSize;
+	unsigned long long SendDmaBase;
+	unsigned long long SendDmaLenGen;
+	unsigned long long SendDmaTail;
+	unsigned long long SendDmaHead;
+	unsigned long long SendDmaHeadAddr;
+	unsigned long long SendDmaBufMask0;
+	unsigned long long SendDmaBufMask1;
+	unsigned long long SendDmaBufMask2;
+	unsigned long long SendDmaStatus;
+	unsigned long long SendBufferError;
+	unsigned long long SendBufferErrorCONT1;
+	unsigned long long SendBufErr2; /* was Reserved6SBE[0/6] */
+	unsigned long long Reserved6L[2];
+	unsigned long long AvailUpdCount;
+	unsigned long long RcvHdrAddr0;
+	unsigned long long RcvHdrAddrs[16]; /* Why enumerate? */
+	unsigned long long Reserved7hdtl; /* Align next to 300 */
+	unsigned long long RcvHdrTailAddr0; /* 300, like others */
+	unsigned long long RcvHdrTailAddrs[16];
+	unsigned long long Reserved9SW[7]; /* was [8]; we have 17 ports */
+	unsigned long long IbsdEpbAccCtl; /* IB Serdes EPB access control */
+	unsigned long long IbsdEpbTransReg; /* IB Serdes EPB Transaction */
+	unsigned long long Reserved10sds; /* was SerdesStatus on */
+	unsigned long long XGXSConfig;
+	unsigned long long IBSerDesCtrl; /* Was IBPLLCfg on Monty */
+	unsigned long long EEPCtlStat; /* for "boot" EEPROM/FLASH */
+	unsigned long long EEPAddrCmd;
+	unsigned long long EEPData;
+	unsigned long long PcieEpbAccCtl;
+	unsigned long long PcieEpbTransCtl;
+	unsigned long long EfuseCtl; /* E-Fuse control */
+	unsigned long long EfuseData[4];
+	unsigned long long ProcMon;
+	/* this chip moves following two from previous 200, 208 */
+	unsigned long long PCIeRBufTestReg0;
+	unsigned long long PCIeRBufTestReg1;
+	/* added for this chip */
+	unsigned long long PCIeRBufTestReg2;
+	unsigned long long PCIeRBufTestReg3;
+	/* added for this chip, debug only */
+	unsigned long long SPC_JTAG_ACCESS_REG;
+	unsigned long long LAControlReg;
+	unsigned long long GPIODebugSelReg;
+	unsigned long long DebugPortValueReg;
+	/* added for this chip, DMA */
+	unsigned long long SendDmaBufUsed[3];
+	unsigned long long SendDmaReqTagUsed;
+	/*
+	 * added for this chip, EFUSE: note that these program 64-bit
+	 * words 2 and 3 */
+	unsigned long long efuse_pgm_data[2];
+	unsigned long long Reserved11LAalign[10]; /* Skip 4B0..4F8 */
+	/* we have 30 regs for DDS and RXEQ in IB SERDES */
+	unsigned long long SerDesDDSRXEQ[30];
+	unsigned long long Reserved12LAalign[2]; /* Skip 5F0, 5F8 */
+	/* added for LA debug support */
+	unsigned long long LAMemory[32];
+};
+
+struct _infinipath_do_not_use_counters {
+	__u64 LBIntCnt;
+	__u64 LBFlowStallCnt;
+	__u64 TxSDmaDescCnt;	/* was Reserved1 */
+	__u64 TxUnsupVLErrCnt;
+	__u64 TxDataPktCnt;
+	__u64 TxFlowPktCnt;
+	__u64 TxDwordCnt;
+	__u64 TxLenErrCnt;
+	__u64 TxMaxMinLenErrCnt;
+	__u64 TxUnderrunCnt;
+	__u64 TxFlowStallCnt;
+	__u64 TxDroppedPktCnt;
+	__u64 RxDroppedPktCnt;
+	__u64 RxDataPktCnt;
+	__u64 RxFlowPktCnt;
+	__u64 RxDwordCnt;
+	__u64 RxLenErrCnt;
+	__u64 RxMaxMinLenErrCnt;
+	__u64 RxICRCErrCnt;
+	__u64 RxVCRCErrCnt;
+	__u64 RxFlowCtrlErrCnt;
+	__u64 RxBadFormatCnt;
+	__u64 RxLinkProblemCnt;
+	__u64 RxEBPCnt;
+	__u64 RxLPCRCErrCnt;
+	__u64 RxBufOvflCnt;
+	__u64 RxTIDFullErrCnt;
+	__u64 RxTIDValidErrCnt;
+	__u64 RxPKeyMismatchCnt;
+	__u64 RxP0HdrEgrOvflCnt;
+	__u64 RxP1HdrEgrOvflCnt;
+	__u64 RxP2HdrEgrOvflCnt;
+	__u64 RxP3HdrEgrOvflCnt;
+	__u64 RxP4HdrEgrOvflCnt;
+	__u64 RxP5HdrEgrOvflCnt;
+	__u64 RxP6HdrEgrOvflCnt;
+	__u64 RxP7HdrEgrOvflCnt;
+	__u64 RxP8HdrEgrOvflCnt;
+	__u64 RxP9HdrEgrOvflCnt;	/* was Reserved6 */
+	__u64 RxP10HdrEgrOvflCnt;	/* was Reserved7 */
+	__u64 RxP11HdrEgrOvflCnt;	/* new for IBA7220 */
+	__u64 RxP12HdrEgrOvflCnt;	/* new for IBA7220 */
+	__u64 RxP13HdrEgrOvflCnt;	/* new for IBA7220 */
+	__u64 RxP14HdrEgrOvflCnt;	/* new for IBA7220 */
+	__u64 RxP15HdrEgrOvflCnt;	/* new for IBA7220 */
+	__u64 RxP16HdrEgrOvflCnt;	/* new for IBA7220 */
+	__u64 IBStatusChangeCnt;
+	__u64 IBLinkErrRecoveryCnt;
+	__u64 IBLinkDownedCnt;
+	__u64 IBSymbolErrCnt;
+	/* The following are new for IBA7220 */
+	__u64 RxVL15DroppedPktCnt;
+	__u64 RxOtherLocalPhyErrCnt;
+	__u64 PcieRetryBufDiagQwordCnt;
+	__u64 ExcessBufferOvflCnt;
+	__u64 LocalLinkIntegrityErrCnt;
+	__u64 RxVlErrCnt;
+	__u64 RxDlidFltrCnt;
+	__u64 Reserved8[7];
+	__u64 PSStat;
+	__u64 PSStart;
+	__u64 PSInterval;
+	__u64 PSRcvDataCount;
+	__u64 PSRcvPktsCount;
+	__u64 PSXmitDataCount;
+	__u64 PSXmitPktsCount;
+	__u64 PSXmitWaitCount;
+};
+
+#define IPATH_KREG_OFFSET(field) (offsetof( \
+	struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
+#define IPATH_CREG_OFFSET(field) (offsetof( \
+	struct _infinipath_do_not_use_counters, field) / sizeof(u64))
+
+static const struct ipath_kregs ipath_7220_kregs = {
+	.kr_control = IPATH_KREG_OFFSET(Control),
+	.kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
+	.kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
+	.kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
+	.kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
+	.kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
+	.kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
+	.kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
+	.kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
+	.kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
+	.kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
+	.kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
+	.kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
+	.kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
+	.kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
+	.kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
+	.kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
+	.kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
+	.kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
+	.kr_intclear = IPATH_KREG_OFFSET(IntClear),
+	.kr_intmask = IPATH_KREG_OFFSET(IntMask),
+	.kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
+	.kr_mdio = IPATH_KREG_OFFSET(MDIO),
+	.kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
+	.kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
+	.kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
+	.kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
+	.kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
+	.kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
+	.kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
+	.kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
+	.kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
+	.kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
+	.kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
+	.kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
+	.kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
+	.kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
+	.kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
+	.kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
+	.kr_revision = IPATH_KREG_OFFSET(Revision),
+	.kr_scratch = IPATH_KREG_OFFSET(Scratch),
+	.kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
+	.kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
+	.kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendAvailAddr),
+	.kr_sendpiobufbase = IPATH_KREG_OFFSET(SendBufBase),
+	.kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendBufCnt),
+	.kr_sendpiosize = IPATH_KREG_OFFSET(SendBufSize),
+	.kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
+	.kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
+	.kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
+	.kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
+
+	.kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
+
+	/* send dma related regs */
+	.kr_senddmabase = IPATH_KREG_OFFSET(SendDmaBase),
+	.kr_senddmalengen = IPATH_KREG_OFFSET(SendDmaLenGen),
+	.kr_senddmatail = IPATH_KREG_OFFSET(SendDmaTail),
+	.kr_senddmahead = IPATH_KREG_OFFSET(SendDmaHead),
+	.kr_senddmaheadaddr = IPATH_KREG_OFFSET(SendDmaHeadAddr),
+	.kr_senddmabufmask0 = IPATH_KREG_OFFSET(SendDmaBufMask0),
+	.kr_senddmabufmask1 = IPATH_KREG_OFFSET(SendDmaBufMask1),
+	.kr_senddmabufmask2 = IPATH_KREG_OFFSET(SendDmaBufMask2),
+	.kr_senddmastatus = IPATH_KREG_OFFSET(SendDmaStatus),
+
+	/* SerDes related regs */
+	.kr_ibserdesctrl = IPATH_KREG_OFFSET(IBSerDesCtrl),
+	.kr_ib_epbacc = IPATH_KREG_OFFSET(IbsdEpbAccCtl),
+	.kr_ib_epbtrans = IPATH_KREG_OFFSET(IbsdEpbTransReg),
+	.kr_pcie_epbacc = IPATH_KREG_OFFSET(PcieEpbAccCtl),
+	.kr_pcie_epbtrans = IPATH_KREG_OFFSET(PcieEpbTransCtl),
+	.kr_ib_ddsrxeq = IPATH_KREG_OFFSET(SerDesDDSRXEQ),
+
+	/*
+	 * These should not be used directly via ipath_read_kreg64(),
+	 * use them with ipath_read_kreg64_port()
+	 */
+	.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
+	.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
+
+	/*
+	 * The rcvpktled register controls one of the debug port signals, so
+	 * a packet activity LED can be connected to it.
+	 */
+	.kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
+	.kr_pcierbuftestreg0 = IPATH_KREG_OFFSET(PCIeRBufTestReg0),
+	.kr_pcierbuftestreg1 = IPATH_KREG_OFFSET(PCIeRBufTestReg1),
+
+	.kr_hrtbt_guid = IPATH_KREG_OFFSET(HRTBT_GUID),
+	.kr_ibcddrctrl = IPATH_KREG_OFFSET(IBCDDRCtrl),
+	.kr_ibcddrstatus = IPATH_KREG_OFFSET(IBCDDRStatus),
+	.kr_jintreload = IPATH_KREG_OFFSET(JIntReload)
+};
+
+static const struct ipath_cregs ipath_7220_cregs = {
+	.cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
+	.cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
+	.cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
+	.cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
+	.cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
+	.cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
+	.cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
+	.cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
+	.cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
+	.cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
+	.cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
+	.cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
+	.cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
+	.cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
+	.cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
+	.cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
+	.cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
+	.cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
+	.cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
+	.cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
+	.cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
+	.cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
+	.cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
+	.cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
+	.cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
+	.cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
+	.cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
+	.cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
+	.cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
+	.cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
+	.cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
+	.cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
+	.cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt),
+	.cr_vl15droppedpktcnt = IPATH_CREG_OFFSET(RxVL15DroppedPktCnt),
+	.cr_rxotherlocalphyerrcnt =
+		IPATH_CREG_OFFSET(RxOtherLocalPhyErrCnt),
+	.cr_excessbufferovflcnt = IPATH_CREG_OFFSET(ExcessBufferOvflCnt),
+	.cr_locallinkintegrityerrcnt =
+		IPATH_CREG_OFFSET(LocalLinkIntegrityErrCnt),
+	.cr_rxvlerrcnt = IPATH_CREG_OFFSET(RxVlErrCnt),
+	.cr_rxdlidfltrcnt = IPATH_CREG_OFFSET(RxDlidFltrCnt),
+	.cr_psstat = IPATH_CREG_OFFSET(PSStat),
+	.cr_psstart = IPATH_CREG_OFFSET(PSStart),
+	.cr_psinterval = IPATH_CREG_OFFSET(PSInterval),
+	.cr_psrcvdatacount = IPATH_CREG_OFFSET(PSRcvDataCount),
+	.cr_psrcvpktscount = IPATH_CREG_OFFSET(PSRcvPktsCount),
+	.cr_psxmitdatacount = IPATH_CREG_OFFSET(PSXmitDataCount),
+	.cr_psxmitpktscount = IPATH_CREG_OFFSET(PSXmitPktsCount),
+	.cr_psxmitwaitcount = IPATH_CREG_OFFSET(PSXmitWaitCount),
+};
+
+/* kr_control bits */
+#define INFINIPATH_C_RESET (1U<<7)
+
+/* kr_intstatus, kr_intclear, kr_intmask bits */
+#define INFINIPATH_I_RCVURG_MASK ((1ULL<<17)-1)
+#define INFINIPATH_I_RCVURG_SHIFT 32
+#define INFINIPATH_I_RCVAVAIL_MASK ((1ULL<<17)-1)
+#define INFINIPATH_I_RCVAVAIL_SHIFT 0
+#define INFINIPATH_I_SERDESTRIMDONE (1ULL<<27)
+
+/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
+#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK  0x00000000000000ffULL
+#define INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT 0
+#define INFINIPATH_HWE_PCIEPOISONEDTLP      0x0000000010000000ULL
+#define INFINIPATH_HWE_PCIECPLTIMEOUT       0x0000000020000000ULL
+#define INFINIPATH_HWE_PCIEBUSPARITYXTLH    0x0000000040000000ULL
+#define INFINIPATH_HWE_PCIEBUSPARITYXADM    0x0000000080000000ULL
+#define INFINIPATH_HWE_PCIEBUSPARITYRADM    0x0000000100000000ULL
+#define INFINIPATH_HWE_COREPLL_FBSLIP       0x0080000000000000ULL
+#define INFINIPATH_HWE_COREPLL_RFSLIP       0x0100000000000000ULL
+#define INFINIPATH_HWE_PCIE1PLLFAILED       0x0400000000000000ULL
+#define INFINIPATH_HWE_PCIE0PLLFAILED       0x0800000000000000ULL
+#define INFINIPATH_HWE_SERDESPLLFAILED      0x1000000000000000ULL
+/* specific to this chip */
+#define INFINIPATH_HWE_PCIECPLDATAQUEUEERR         0x0000000000000040ULL
+#define INFINIPATH_HWE_PCIECPLHDRQUEUEERR          0x0000000000000080ULL
+#define INFINIPATH_HWE_SDMAMEMREADERR              0x0000000010000000ULL
+#define INFINIPATH_HWE_CLK_UC_PLLNOTLOCKED	   0x2000000000000000ULL
+#define INFINIPATH_HWE_PCIESERDESQ0PCLKNOTDETECT   0x0100000000000000ULL
+#define INFINIPATH_HWE_PCIESERDESQ1PCLKNOTDETECT   0x0200000000000000ULL
+#define INFINIPATH_HWE_PCIESERDESQ2PCLKNOTDETECT   0x0400000000000000ULL
+#define INFINIPATH_HWE_PCIESERDESQ3PCLKNOTDETECT   0x0800000000000000ULL
+#define INFINIPATH_HWE_DDSRXEQMEMORYPARITYERR	   0x0000008000000000ULL
+#define INFINIPATH_HWE_IB_UC_MEMORYPARITYERR	   0x0000004000000000ULL
+#define INFINIPATH_HWE_PCIE_UC_OCT0MEMORYPARITYERR 0x0000001000000000ULL
+#define INFINIPATH_HWE_PCIE_UC_OCT1MEMORYPARITYERR 0x0000002000000000ULL
+
+#define IBA7220_IBCS_LINKTRAININGSTATE_MASK 0x1F
+#define IBA7220_IBCS_LINKSTATE_SHIFT 5
+#define IBA7220_IBCS_LINKSPEED_SHIFT 8
+#define IBA7220_IBCS_LINKWIDTH_SHIFT 9
+
+#define IBA7220_IBCC_LINKINITCMD_MASK 0x7ULL
+#define IBA7220_IBCC_LINKCMD_SHIFT 19
+#define IBA7220_IBCC_MAXPKTLEN_SHIFT 21
+
+/* kr_ibcddrctrl bits */
+#define IBA7220_IBC_DLIDLMC_MASK	0xFFFFFFFFUL
+#define IBA7220_IBC_DLIDLMC_SHIFT	32
+#define IBA7220_IBC_HRTBT_MASK	3
+#define IBA7220_IBC_HRTBT_SHIFT	16
+#define IBA7220_IBC_HRTBT_ENB	0x10000UL
+#define IBA7220_IBC_LANE_REV_SUPPORTED (1<<8)
+#define IBA7220_IBC_LREV_MASK	1
+#define IBA7220_IBC_LREV_SHIFT	8
+#define IBA7220_IBC_RXPOL_MASK	1
+#define IBA7220_IBC_RXPOL_SHIFT	7
+#define IBA7220_IBC_WIDTH_SHIFT	5
+#define IBA7220_IBC_WIDTH_MASK	0x3
+#define IBA7220_IBC_WIDTH_1X_ONLY	(0<<IBA7220_IBC_WIDTH_SHIFT)
+#define IBA7220_IBC_WIDTH_4X_ONLY	(1<<IBA7220_IBC_WIDTH_SHIFT)
+#define IBA7220_IBC_WIDTH_AUTONEG	(2<<IBA7220_IBC_WIDTH_SHIFT)
+#define IBA7220_IBC_SPEED_AUTONEG	(1<<1)
+#define IBA7220_IBC_SPEED_SDR		(1<<2)
+#define IBA7220_IBC_SPEED_DDR		(1<<3)
+#define IBA7220_IBC_SPEED_AUTONEG_MASK  (0x7<<1)
+#define IBA7220_IBC_IBTA_1_2_MASK	(1)
+
+/* kr_ibcddrstatus */
+/* link latency shift is 0, don't bother defining */
+#define IBA7220_DDRSTAT_LINKLAT_MASK    0x3ffffff
+
+/* kr_extstatus bits */
+#define INFINIPATH_EXTS_FREQSEL 0x2
+#define INFINIPATH_EXTS_SERDESSEL 0x4
+#define INFINIPATH_EXTS_MEMBIST_ENDTEST     0x0000000000004000
+#define INFINIPATH_EXTS_MEMBIST_DISABLED    0x0000000000008000
+
+/* kr_xgxsconfig bits */
+#define INFINIPATH_XGXS_RESET          0x5ULL
+#define INFINIPATH_XGXS_FC_SAFE        (1ULL<<63)
+
+/* kr_rcvpktledcnt */
+#define IBA7220_LEDBLINK_ON_SHIFT 32 /* 4ns period on after packet */
+#define IBA7220_LEDBLINK_OFF_SHIFT 0 /* 4ns period off before next on */
+
+#define _IPATH_GPIO_SDA_NUM 1
+#define _IPATH_GPIO_SCL_NUM 0
+
+#define IPATH_GPIO_SDA (1ULL << \
+	(_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+#define IPATH_GPIO_SCL (1ULL << \
+	(_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+
+#define IBA7220_R_INTRAVAIL_SHIFT 17
+#define IBA7220_R_TAILUPD_SHIFT 35
+#define IBA7220_R_PORTCFG_SHIFT 36
+
+#define INFINIPATH_JINT_PACKETSHIFT 16
+#define INFINIPATH_JINT_DEFAULT_IDLE_TICKS  0
+#define INFINIPATH_JINT_DEFAULT_MAX_PACKETS 0
+
+#define IBA7220_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */
+
+/*
+ * the size bits give us 2^N, in KB units.  0 marks as invalid,
+ * and 7 is reserved.  We currently use only 2KB and 4KB
+ */
+#define IBA7220_TID_SZ_SHIFT 37 /* shift to 3bit size selector */
+#define IBA7220_TID_SZ_2K (1UL<<IBA7220_TID_SZ_SHIFT) /* 2KB */
+#define IBA7220_TID_SZ_4K (2UL<<IBA7220_TID_SZ_SHIFT) /* 4KB */
+#define IBA7220_TID_PA_SHIFT 11U /* TID addr in chip stored w/o low bits */
+
+#define IPATH_AUTONEG_TRIES 5 /* sequential retries to negotiate DDR */
+
+static char int_type[16] = "auto";
+module_param_string(interrupt_type, int_type, sizeof(int_type), 0444);
+MODULE_PARM_DESC(int_type, " interrupt_type=auto|force_msi|force_intx\n");
+
+/* packet rate matching delay; chip has support */
+static u8 rate_to_delay[2][2] = {
+	/* 1x, 4x */
+	{   8, 2 }, /* SDR */
+	{   4, 1 }  /* DDR */
+};
+
+/* 7220 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_7220_hwerror_msgs[] = {
+	INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
+	INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"),
+	/*
+	 * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
+	 * parity or memory parity error failures, because most likely we
+	 * won't be able to talk to the core of the chip.  Nonetheless, we
+	 * might see them, if they are in parts of the PCIe core that aren't
+	 * essential.
+	 */
+	INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"),
+	INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"),
+	INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"),
+	INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"),
+	INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"),
+	INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+	INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+	INFINIPATH_HWE_MSG(PCIECPLDATAQUEUEERR, "PCIe cpl header queue"),
+	INFINIPATH_HWE_MSG(PCIECPLHDRQUEUEERR, "PCIe cpl data queue"),
+	INFINIPATH_HWE_MSG(SDMAMEMREADERR, "Send DMA memory read"),
+	INFINIPATH_HWE_MSG(CLK_UC_PLLNOTLOCKED, "uC PLL clock not locked"),
+	INFINIPATH_HWE_MSG(PCIESERDESQ0PCLKNOTDETECT,
+		"PCIe serdes Q0 no clock"),
+	INFINIPATH_HWE_MSG(PCIESERDESQ1PCLKNOTDETECT,
+		"PCIe serdes Q1 no clock"),
+	INFINIPATH_HWE_MSG(PCIESERDESQ2PCLKNOTDETECT,
+		"PCIe serdes Q2 no clock"),
+	INFINIPATH_HWE_MSG(PCIESERDESQ3PCLKNOTDETECT,
+		"PCIe serdes Q3 no clock"),
+	INFINIPATH_HWE_MSG(DDSRXEQMEMORYPARITYERR,
+		"DDS RXEQ memory parity"),
+	INFINIPATH_HWE_MSG(IB_UC_MEMORYPARITYERR, "IB uC memory parity"),
+	INFINIPATH_HWE_MSG(PCIE_UC_OCT0MEMORYPARITYERR,
+		"PCIe uC oct0 memory parity"),
+	INFINIPATH_HWE_MSG(PCIE_UC_OCT1MEMORYPARITYERR,
+		"PCIe uC oct1 memory parity"),
+};
+
+static void autoneg_work(struct work_struct *);
+
+/*
+ * the offset is different for different configured port numbers, since
+ * port0 is fixed in size, but others can vary.   Make it a function to
+ * make the issue more obvious.
+*/
+static inline u32 port_egrtid_idx(struct ipath_devdata *dd, unsigned port)
+{
+	 return port ? dd->ipath_p0_rcvegrcnt +
+		 (port-1) * dd->ipath_rcvegrcnt : 0;
+}
+
+static void ipath_7220_txe_recover(struct ipath_devdata *dd)
+{
+	++ipath_stats.sps_txeparity;
+
+	dev_info(&dd->pcidev->dev,
+		"Recovering from TXE PIO parity error\n");
+	ipath_disarm_senderrbufs(dd, 1);
+}
+
+
+/**
+ * ipath_7220_handle_hwerrors - display hardware errors.
+ * @dd: the infinipath device
+ * @msg: the output buffer
+ * @msgl: the size of the output buffer
+ *
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use.  Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue.  We reuse the same message buffer as
+ * ipath_handle_errors() to avoid excessive stack usage.
+ */
+static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
+				       size_t msgl)
+{
+	ipath_err_t hwerrs;
+	u32 bits, ctrl;
+	int isfatal = 0;
+	char bitsmsg[64];
+	int log_idx;
+
+	hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
+	if (!hwerrs) {
+		/*
+		 * better than printing cofusing messages
+		 * This seems to be related to clearing the crc error, or
+		 * the pll error during init.
+		 */
+		ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
+		goto bail;
+	} else if (hwerrs == ~0ULL) {
+		ipath_dev_err(dd, "Read of hardware error status failed "
+			      "(all bits set); ignoring\n");
+		goto bail;
+	}
+	ipath_stats.sps_hwerrs++;
+
+	/*
+	 * Always clear the error status register, except MEMBISTFAIL,
+	 * regardless of whether we continue or stop using the chip.
+	 * We want that set so we know it failed, even across driver reload.
+	 * We'll still ignore it in the hwerrmask.  We do this partly for
+	 * diagnostics, but also for support.
+	 */
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+			 hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
+
+	hwerrs &= dd->ipath_hwerrmask;
+
+	/* We log some errors to EEPROM, check if we have any of those. */
+	for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
+		if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
+			ipath_inc_eeprom_err(dd, log_idx, 1);
+	/*
+	 * Make sure we get this much out, unless told to be quiet,
+	 * or it's occurred within the last 5 seconds.
+	 */
+	if ((hwerrs & ~(dd->ipath_lasthwerror |
+			((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+			  INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+			 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
+	    (ipath_debug & __IPATH_VERBDBG))
+		dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
+			 "(cleared)\n", (unsigned long long) hwerrs);
+	dd->ipath_lasthwerror |= hwerrs;
+
+	if (hwerrs & ~dd->ipath_hwe_bitsextant)
+		ipath_dev_err(dd, "hwerror interrupt with unknown errors "
+			      "%llx set\n", (unsigned long long)
+			      (hwerrs & ~dd->ipath_hwe_bitsextant));
+
+	if (hwerrs & INFINIPATH_HWE_IB_UC_MEMORYPARITYERR)
+		ipath_sd7220_clr_ibpar(dd);
+
+	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
+	if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
+		/*
+		 * Parity errors in send memory are recoverable,
+		 * just cancel the send (if indicated in * sendbuffererror),
+		 * count the occurrence, unfreeze (if no other handled
+		 * hardware error bits are set), and continue.
+		 */
+		if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+			       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+			      << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+			ipath_7220_txe_recover(dd);
+			hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+				     INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+				    << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+			if (!hwerrs) {
+				/* else leave in freeze mode */
+				ipath_write_kreg(dd,
+						 dd->ipath_kregs->kr_control,
+						 dd->ipath_control);
+				goto bail;
+			}
+		}
+		if (hwerrs) {
+			/*
+			 * If any set that we aren't ignoring only make the
+			 * complaint once, in case it's stuck or recurring,
+			 * and we get here multiple times
+			 * Force link down, so switch knows, and
+			 * LEDs are turned off.
+			 */
+			if (dd->ipath_flags & IPATH_INITTED) {
+				ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+				ipath_setup_7220_setextled(dd,
+					INFINIPATH_IBCS_L_STATE_DOWN,
+					INFINIPATH_IBCS_LT_STATE_DISABLED);
+				ipath_dev_err(dd, "Fatal Hardware Error "
+					      "(freeze mode), no longer"
+					      " usable, SN %.16s\n",
+						  dd->ipath_serial);
+				isfatal = 1;
+			}
+			/*
+			 * Mark as having had an error for driver, and also
+			 * for /sys and status word mapped to user programs.
+			 * This marks unit as not usable, until reset.
+			 */
+			*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+			*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+			dd->ipath_flags &= ~IPATH_INITTED;
+		} else {
+			ipath_dbg("Clearing freezemode on ignored hardware "
+				  "error\n");
+			ipath_clear_freeze(dd);
+		}
+	}
+
+	*msg = '\0';
+
+	if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
+		strlcat(msg, "[Memory BIST test failed, "
+			"InfiniPath hardware unusable]", msgl);
+		/* ignore from now on, so disable until driver reloaded */
+		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+		dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+				 dd->ipath_hwerrmask);
+	}
+
+	ipath_format_hwerrors(hwerrs,
+			      ipath_7220_hwerror_msgs,
+			      ARRAY_SIZE(ipath_7220_hwerror_msgs),
+			      msg, msgl);
+
+	if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
+		      << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
+		bits = (u32) ((hwerrs >>
+			       INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) &
+			      INFINIPATH_HWE_PCIEMEMPARITYERR_MASK);
+		snprintf(bitsmsg, sizeof bitsmsg,
+			 "[PCIe Mem Parity Errs %x] ", bits);
+		strlcat(msg, bitsmsg, msgl);
+	}
+
+#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP |	\
+			 INFINIPATH_HWE_COREPLL_RFSLIP)
+
+	if (hwerrs & _IPATH_PLL_FAIL) {
+		snprintf(bitsmsg, sizeof bitsmsg,
+			 "[PLL failed (%llx), InfiniPath hardware unusable]",
+			 (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
+		strlcat(msg, bitsmsg, msgl);
+		/* ignore from now on, so disable until driver reloaded */
+		dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+				 dd->ipath_hwerrmask);
+	}
+
+	if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
+		/*
+		 * If it occurs, it is left masked since the eternal
+		 * interface is unused.
+		 */
+		dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+				 dd->ipath_hwerrmask);
+	}
+
+	ipath_dev_err(dd, "%s hardware error\n", msg);
+	/*
+	 * For /sys status file. if no trailing } is copied, we'll
+	 * know it was truncated.
+	 */
+	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
+		snprintf(dd->ipath_freezemsg, dd->ipath_freezelen,
+			 "{%s}", msg);
+bail:;
+}
+
+/**
+ * ipath_7220_boardname - fill in the board name
+ * @dd: the infinipath device
+ * @name: the output buffer
+ * @namelen: the size of the output buffer
+ *
+ * info is based on the board revision register
+ */
+static int ipath_7220_boardname(struct ipath_devdata *dd, char *name,
+	size_t namelen)
+{
+	char *n = NULL;
+	u8 boardrev = dd->ipath_boardrev;
+	int ret;
+
+	if (boardrev == 15) {
+		/*
+		 * Emulator sometimes comes up all-ones, rather than zero.
+		 */
+		boardrev = 0;
+		dd->ipath_boardrev = boardrev;
+	}
+	switch (boardrev) {
+	case 0:
+		n = "InfiniPath_7220_Emulation";
+		break;
+	case 1:
+		n = "InfiniPath_QLE7240";
+		break;
+	case 2:
+		n = "InfiniPath_QLE7280";
+		break;
+	case 3:
+		n = "InfiniPath_QLE7242";
+		break;
+	case 4:
+		n = "InfiniPath_QEM7240";
+		break;
+	case 5:
+		n = "InfiniPath_QMI7240";
+		break;
+	case 6:
+		n = "InfiniPath_QMI7264";
+		break;
+	case 7:
+		n = "InfiniPath_QMH7240";
+		break;
+	case 8:
+		n = "InfiniPath_QME7240";
+		break;
+	case 9:
+		n = "InfiniPath_QLE7250";
+		break;
+	case 10:
+		n = "InfiniPath_QLE7290";
+		break;
+	case 11:
+		n = "InfiniPath_QEM7250";
+		break;
+	case 12:
+		n = "InfiniPath_QLE-Bringup";
+		break;
+	default:
+		ipath_dev_err(dd,
+			      "Don't yet know about board with ID %u\n",
+			      boardrev);
+		snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
+			 boardrev);
+		break;
+	}
+	if (n)
+		snprintf(name, namelen, "%s", n);
+
+	if (dd->ipath_majrev != 5 || !dd->ipath_minrev ||
+		dd->ipath_minrev > 2) {
+		ipath_dev_err(dd, "Unsupported InfiniPath hardware "
+			      "revision %u.%u!\n",
+			      dd->ipath_majrev, dd->ipath_minrev);
+		ret = 1;
+	} else if (dd->ipath_minrev == 1) {
+		/* Rev1 chips are prototype. Complain, but allow use */
+		ipath_dev_err(dd, "Unsupported hardware "
+			      "revision %u.%u, Contact support@qlogic.com\n",
+			      dd->ipath_majrev, dd->ipath_minrev);
+		ret = 0;
+	} else
+		ret = 0;
+
+	/*
+	 * Set here not in ipath_init_*_funcs because we have to do
+	 * it after we can read chip registers.
+	 */
+	dd->ipath_ureg_align = 0x10000;  /* 64KB alignment */
+
+	return ret;
+}
+
+/**
+ * ipath_7220_init_hwerrors - enable hardware errors
+ * @dd: the infinipath device
+ *
+ * now that we have finished initializing everything that might reasonably
+ * cause a hardware error, and cleared those errors bits as they occur,
+ * we can enable hardware errors in the mask (potentially enabling
+ * freeze mode), and enable hardware errors as errors (along with
+ * everything else) in errormask
+ */
+static void ipath_7220_init_hwerrors(struct ipath_devdata *dd)
+{
+	ipath_err_t val;
+	u64 extsval;
+
+	extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
+
+	if (!(extsval & (INFINIPATH_EXTS_MEMBIST_ENDTEST |
+			INFINIPATH_EXTS_MEMBIST_DISABLED)))
+		ipath_dev_err(dd, "MemBIST did not complete!\n");
+	if (extsval & INFINIPATH_EXTS_MEMBIST_DISABLED)
+		dev_info(&dd->pcidev->dev, "MemBIST is disabled.\n");
+
+	val = ~0ULL;	/* barring bugs, all hwerrors become interrupts, */
+
+	if (!dd->ipath_boardrev)	/* no PLL for Emulator */
+		val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
+
+	if (dd->ipath_minrev == 1)
+		val &= ~(1ULL << 42); /* TXE LaunchFIFO Parity rev1 issue */
+
+	val &= ~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR;
+	dd->ipath_hwerrmask = val;
+
+	/*
+	 * special trigger "error" is for debugging purposes. It
+	 * works around a processor/chipset problem.  The error
+	 * interrupt allows us to count occurrences, but we don't
+	 * want to pay the overhead for normal use.  Emulation only
+	 */
+	if (!dd->ipath_boardrev)
+		dd->ipath_maskederrs = INFINIPATH_E_SENDSPECIALTRIGGER;
+}
+
+/*
+ * All detailed interaction with the SerDes has been moved to ipath_sd7220.c
+ *
+ * The portion of IBA7220-specific bringup_serdes() that actually deals with
+ * registers and memory within the SerDes itself is ipath_sd7220_init().
+ */
+
+/**
+ * ipath_7220_bringup_serdes - bring up the serdes
+ * @dd: the infinipath device
+ */
+static int ipath_7220_bringup_serdes(struct ipath_devdata *dd)
+{
+	int ret = 0;
+	u64 val, prev_val, guid;
+	int was_reset;		/* Note whether uC was reset */
+
+	ipath_dbg("Trying to bringup serdes\n");
+
+	if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
+	    INFINIPATH_HWE_SERDESPLLFAILED) {
+		ipath_dbg("At start, serdes PLL failed bit set "
+			  "in hwerrstatus, clearing and continuing\n");
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+				 INFINIPATH_HWE_SERDESPLLFAILED);
+	}
+
+	if (!dd->ipath_ibcddrctrl) {
+		/* not on re-init after reset */
+		dd->ipath_ibcddrctrl =
+			ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcddrctrl);
+
+		if (dd->ipath_link_speed_enabled ==
+			(IPATH_IB_SDR | IPATH_IB_DDR))
+			dd->ipath_ibcddrctrl |=
+				IBA7220_IBC_SPEED_AUTONEG_MASK |
+				IBA7220_IBC_IBTA_1_2_MASK;
+		else
+			dd->ipath_ibcddrctrl |=
+				dd->ipath_link_speed_enabled == IPATH_IB_DDR
+				?  IBA7220_IBC_SPEED_DDR :
+				IBA7220_IBC_SPEED_SDR;
+		if ((dd->ipath_link_width_enabled & (IB_WIDTH_1X |
+			IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X))
+			dd->ipath_ibcddrctrl |= IBA7220_IBC_WIDTH_AUTONEG;
+		else
+			dd->ipath_ibcddrctrl |=
+				dd->ipath_link_width_enabled == IB_WIDTH_4X
+				? IBA7220_IBC_WIDTH_4X_ONLY :
+				IBA7220_IBC_WIDTH_1X_ONLY;
+
+		/* always enable these on driver reload, not sticky */
+		dd->ipath_ibcddrctrl |=
+			IBA7220_IBC_RXPOL_MASK << IBA7220_IBC_RXPOL_SHIFT;
+		dd->ipath_ibcddrctrl |=
+			IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT;
+		/*
+		 * automatic lane reversal detection for receive
+		 * doesn't work correctly in rev 1, so disable it
+		 * on that rev, otherwise enable (disabling not
+		 * sticky across reload for >rev1)
+		 */
+		if (dd->ipath_minrev == 1)
+			dd->ipath_ibcddrctrl &=
+			~IBA7220_IBC_LANE_REV_SUPPORTED;
+		else
+			dd->ipath_ibcddrctrl |=
+				IBA7220_IBC_LANE_REV_SUPPORTED;
+	}
+
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl,
+			dd->ipath_ibcddrctrl);
+
+	ipath_write_kreg(dd, IPATH_KREG_OFFSET(IBNCModeCtrl), 0Ull);
+
+	/* IBA7220 has SERDES MPU reset in D0 of what _was_ IBPLLCfg */
+	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibserdesctrl);
+	/* remember if uC was in Reset or not, for dactrim */
+	was_reset = (val & 1);
+	ipath_cdbg(VERBOSE, "IBReset %s xgxsconfig %llx\n",
+		   was_reset ? "Asserted" : "Negated", (unsigned long long)
+		   ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
+
+	if (dd->ipath_boardrev) {
+		/*
+		 * Hardware is not emulator, and may have been reset. Init it.
+		 * Below will release reset, but needs to know if chip was
+		 * originally in reset, to only trim DACs on first time
+		 * after chip reset or powercycle (not driver reload)
+		 */
+		ret = ipath_sd7220_init(dd, was_reset);
+	}
+
+	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+	prev_val = val;
+	val |= INFINIPATH_XGXS_FC_SAFE;
+	if (val != prev_val) {
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+		ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+	}
+	if (val & INFINIPATH_XGXS_RESET)
+		val &= ~INFINIPATH_XGXS_RESET;
+	if (val != prev_val)
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+
+	ipath_cdbg(VERBOSE, "done: xgxs=%llx from %llx\n",
+		   (unsigned long long)
+		   ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig),
+		   prev_val);
+
+	guid = be64_to_cpu(dd->ipath_guid);
+
+	if (!guid) {
+		/* have to have something, so use likely unique tsc */
+		guid = get_cycles();
+		ipath_dbg("No GUID for heartbeat, faking %llx\n",
+			(unsigned long long)guid);
+	} else
+		ipath_cdbg(VERBOSE, "Wrote %llX to HRTBT_GUID\n", guid);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_hrtbt_guid, guid);
+	return ret;
+}
+
+static void ipath_7220_config_jint(struct ipath_devdata *dd,
+				   u16 idle_ticks, u16 max_packets)
+{
+
+	/*
+	 * We can request a receive interrupt for 1 or more packets
+	 * from current offset.
+	 */
+	if (idle_ticks == 0 || max_packets == 0)
+		/* interrupt after one packet if no mitigation */
+		dd->ipath_rhdrhead_intr_off =
+			1ULL << IBA7220_HDRHEAD_PKTINT_SHIFT;
+	else
+		/* Turn off RcvHdrHead interrupts if using mitigation */
+		dd->ipath_rhdrhead_intr_off = 0ULL;
+
+	/* refresh kernel RcvHdrHead registers... */
+	ipath_write_ureg(dd, ur_rcvhdrhead,
+			 dd->ipath_rhdrhead_intr_off |
+			 dd->ipath_pd[0]->port_head, 0);
+
+	dd->ipath_jint_max_packets = max_packets;
+	dd->ipath_jint_idle_ticks = idle_ticks;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_jintreload,
+			 ((u64) max_packets << INFINIPATH_JINT_PACKETSHIFT) |
+			 idle_ticks);
+}
+
+/**
+ * ipath_7220_quiet_serdes - set serdes to txidle
+ * @dd: the infinipath device
+ * Called when driver is being unloaded
+ */
+static void ipath_7220_quiet_serdes(struct ipath_devdata *dd)
+{
+	u64 val;
+	dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG;
+	wake_up(&dd->ipath_autoneg_wait);
+	cancel_delayed_work(&dd->ipath_autoneg_work);
+	flush_scheduled_work();
+	ipath_shutdown_relock_poll(dd);
+	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+	val |= INFINIPATH_XGXS_RESET;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+}
+
+static int ipath_7220_intconfig(struct ipath_devdata *dd)
+{
+	ipath_7220_config_jint(dd, dd->ipath_jint_idle_ticks,
+			       dd->ipath_jint_max_packets);
+	return 0;
+}
+
+/**
+ * ipath_setup_7220_setextled - set the state of the two external LEDs
+ * @dd: the infinipath device
+ * @lst: the L state
+ * @ltst: the LT state
+ *
+ * These LEDs indicate the physical and logical state of IB link.
+ * For this chip (at least with recommended board pinouts), LED1
+ * is Yellow (logical state) and LED2 is Green (physical state),
+ *
+ * Note:  We try to match the Mellanox HCA LED behavior as best
+ * we can.  Green indicates physical link state is OK (something is
+ * plugged in, and we can train).
+ * Amber indicates the link is logically up (ACTIVE).
+ * Mellanox further blinks the amber LED to indicate data packet
+ * activity, but we have no hardware support for that, so it would
+ * require waking up every 10-20 msecs and checking the counters
+ * on the chip, and then turning the LED off if appropriate.  That's
+ * visible overhead, so not something we will do.
+ *
+ */
+static void ipath_setup_7220_setextled(struct ipath_devdata *dd, u64 lst,
+				       u64 ltst)
+{
+	u64 extctl, ledblink = 0;
+	unsigned long flags = 0;
+
+	/* the diags use the LED to indicate diag info, so we leave
+	 * the external LED alone when the diags are running */
+	if (ipath_diag_inuse)
+		return;
+
+	/* Allow override of LED display for, e.g. Locating system in rack */
+	if (dd->ipath_led_override) {
+		ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
+			? INFINIPATH_IBCS_LT_STATE_LINKUP
+			: INFINIPATH_IBCS_LT_STATE_DISABLED;
+		lst = (dd->ipath_led_override & IPATH_LED_LOG)
+			? INFINIPATH_IBCS_L_STATE_ACTIVE
+			: INFINIPATH_IBCS_L_STATE_DOWN;
+	}
+
+	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+	extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
+				       INFINIPATH_EXTC_LED2PRIPORT_ON);
+	if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP) {
+		extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
+		/*
+		 * counts are in chip clock (4ns) periods.
+		 * This is 1/16 sec (66.6ms) on,
+		 * 3/16 sec (187.5 ms) off, with packets rcvd
+		 */
+		ledblink = ((66600*1000UL/4) << IBA7220_LEDBLINK_ON_SHIFT)
+			| ((187500*1000UL/4) << IBA7220_LEDBLINK_OFF_SHIFT);
+	}
+	if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
+		extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
+	dd->ipath_extctrl = extctl;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
+	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
+
+	if (ledblink) /* blink the LED on packet receive */
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvpktledcnt,
+			ledblink);
+}
+
+/*
+ * Similar to pci_intx(pdev, 1), except that we make sure
+ * msi is off...
+ */
+static void ipath_enable_intx(struct pci_dev *pdev)
+{
+	u16 cw, new;
+	int pos;
+
+	/* first, turn on INTx */
+	pci_read_config_word(pdev, PCI_COMMAND, &cw);
+	new = cw & ~PCI_COMMAND_INTX_DISABLE;
+	if (new != cw)
+		pci_write_config_word(pdev, PCI_COMMAND, new);
+
+	/* then turn off MSI */
+	pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+	if (pos) {
+		pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw);
+		new = cw & ~PCI_MSI_FLAGS_ENABLE;
+		if (new != cw)
+			pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new);
+	}
+}
+
+static int ipath_msi_enabled(struct pci_dev *pdev)
+{
+	int pos, ret = 0;
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+	if (pos) {
+		u16 cw;
+
+		pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw);
+		ret = !!(cw & PCI_MSI_FLAGS_ENABLE);
+	}
+	return ret;
+}
+
+/*
+ * disable msi interrupt if enabled, and clear the flag.
+ * flag is used primarily for the fallback to IntX, but
+ * is also used in reinit after reset as a flag.
+ */
+static void ipath_7220_nomsi(struct ipath_devdata *dd)
+{
+	dd->ipath_msi_lo = 0;
+#ifdef CONFIG_PCI_MSI
+	if (ipath_msi_enabled(dd->pcidev)) {
+		/*
+		 * free, but don't zero; later kernels require
+		 * it be freed before disable_msi, so the intx
+		 * setup has to request it again.
+		 */
+		 if (dd->ipath_irq)
+			free_irq(dd->ipath_irq, dd);
+		pci_disable_msi(dd->pcidev);
+	}
+#endif
+}
+
+/*
+ * ipath_setup_7220_cleanup - clean up any per-chip chip-specific stuff
+ * @dd: the infinipath device
+ *
+ * Nothing but msi interrupt cleanup for now.
+ *
+ * This is called during driver unload.
+ */
+static void ipath_setup_7220_cleanup(struct ipath_devdata *dd)
+{
+	ipath_7220_nomsi(dd);
+}
+
+
+static void ipath_7220_pcie_params(struct ipath_devdata *dd, u32 boardrev)
+{
+	u16 linkstat, minwidth, speed;
+	int pos;
+
+	pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP);
+	if (!pos) {
+		ipath_dev_err(dd, "Can't find PCI Express capability!\n");
+		goto bail;
+	}
+
+	pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA,
+			     &linkstat);
+	/*
+	 * speed is bits 0-4, linkwidth is bits 4-8
+	 * no defines for them in headers
+	 */
+	speed = linkstat & 0xf;
+	linkstat >>= 4;
+	linkstat &= 0x1f;
+	dd->ipath_lbus_width = linkstat;
+	switch (boardrev) {
+	case 0:
+	case 2:
+	case 10:
+	case 12:
+		minwidth = 16; /* x16 capable boards */
+		break;
+	default:
+		minwidth = 8; /* x8 capable boards */
+		break;
+	}
+
+	switch (speed) {
+	case 1:
+		dd->ipath_lbus_speed = 2500; /* Gen1, 2.5GHz */
+		break;
+	case 2:
+		dd->ipath_lbus_speed = 5000; /* Gen1, 5GHz */
+		break;
+	default: /* not defined, assume gen1 */
+		dd->ipath_lbus_speed = 2500;
+		break;
+	}
+
+	if (linkstat < minwidth)
+		ipath_dev_err(dd,
+			"PCIe width %u (x%u HCA), performance "
+			"reduced\n", linkstat, minwidth);
+	else
+		ipath_cdbg(VERBOSE, "PCIe speed %u width %u (x%u HCA)\n",
+			dd->ipath_lbus_speed, linkstat, minwidth);
+
+	if (speed != 1)
+		ipath_dev_err(dd,
+			"PCIe linkspeed %u is incorrect; "
+			"should be 1 (2500)!\n", speed);
+
+bail:
+	/* fill in string, even on errors */
+	snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
+		"PCIe,%uMHz,x%u\n",
+		dd->ipath_lbus_speed,
+		dd->ipath_lbus_width);
+	return;
+}
+
+
+/**
+ * ipath_setup_7220_config - setup PCIe config related stuff
+ * @dd: the infinipath device
+ * @pdev: the PCI device
+ *
+ * The pci_enable_msi() call will fail on systems with MSI quirks
+ * such as those with AMD8131, even if the device of interest is not
+ * attached to that device, (in the 2.6.13 - 2.6.15 kernels, at least, fixed
+ * late in 2.6.16).
+ * All that can be done is to edit the kernel source to remove the quirk
+ * check until that is fixed.
+ * We do not need to call enable_msi() for our HyperTransport chip,
+ * even though it uses MSI, and we want to avoid the quirk warning, so
+ * So we call enable_msi only for PCIe.  If we do end up needing
+ * pci_enable_msi at some point in the future for HT, we'll move the
+ * call back into the main init_one code.
+ * We save the msi lo and hi values, so we can restore them after
+ * chip reset (the kernel PCI infrastructure doesn't yet handle that
+ * correctly).
+ */
+static int ipath_setup_7220_config(struct ipath_devdata *dd,
+				   struct pci_dev *pdev)
+{
+	int pos, ret = -1;
+	u32 boardrev;
+
+	dd->ipath_msi_lo = 0;	/* used as a flag during reset processing */
+#ifdef CONFIG_PCI_MSI
+	pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+	if (!strcmp(int_type, "force_msi") || !strcmp(int_type, "auto"))
+		ret = pci_enable_msi(pdev);
+	if (ret) {
+		if (!strcmp(int_type, "force_msi")) {
+			ipath_dev_err(dd, "pci_enable_msi failed: %d, "
+				      "force_msi is on, so not continuing.\n",
+				      ret);
+			return ret;
+		}
+
+		ipath_enable_intx(pdev);
+		if (!strcmp(int_type, "auto"))
+			ipath_dev_err(dd, "pci_enable_msi failed: %d, "
+				      "falling back to INTx\n", ret);
+	} else if (pos) {
+		u16 control;
+		pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO,
+				      &dd->ipath_msi_lo);
+		pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI,
+				      &dd->ipath_msi_hi);
+		pci_read_config_word(pdev, pos + PCI_MSI_FLAGS,
+				     &control);
+		/* now save the data (vector) info */
+		pci_read_config_word(pdev,
+				     pos + ((control & PCI_MSI_FLAGS_64BIT)
+					    ? PCI_MSI_DATA_64 :
+					    PCI_MSI_DATA_32),
+				     &dd->ipath_msi_data);
+	} else
+		ipath_dev_err(dd, "Can't find MSI capability, "
+			      "can't save MSI settings for reset\n");
+#else
+	ipath_dbg("PCI_MSI not configured, using IntX interrupts\n");
+	ipath_enable_intx(pdev);
+#endif
+
+	dd->ipath_irq = pdev->irq;
+
+	/*
+	 * We save the cachelinesize also, although it doesn't
+	 * really matter.
+	 */
+	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE,
+			     &dd->ipath_pci_cacheline);
+
+	/*
+	 * this function called early, ipath_boardrev not set yet.  Can't
+	 * use ipath_read_kreg64() yet, too early in init, so use readq()
+	 */
+	boardrev = (readq(&dd->ipath_kregbase[dd->ipath_kregs->kr_revision])
+		 >> INFINIPATH_R_BOARDID_SHIFT) & INFINIPATH_R_BOARDID_MASK;
+
+	ipath_7220_pcie_params(dd, boardrev);
+
+	dd->ipath_flags |= IPATH_NODMA_RTAIL | IPATH_HAS_SEND_DMA |
+		IPATH_HAS_PBC_CNT | IPATH_HAS_THRESH_UPDATE;
+	dd->ipath_pioupd_thresh = 4U; /* set default update threshold */
+	return 0;
+}
+
+static void ipath_init_7220_variables(struct ipath_devdata *dd)
+{
+	/*
+	 * setup the register offsets, since they are different for each
+	 * chip
+	 */
+	dd->ipath_kregs = &ipath_7220_kregs;
+	dd->ipath_cregs = &ipath_7220_cregs;
+
+	/*
+	 * bits for selecting i2c direction and values,
+	 * used for I2C serial flash
+	 */
+	dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+	dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+	dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+	dd->ipath_gpio_scl = IPATH_GPIO_SCL;
+
+	/*
+	 * Fill in data for field-values that change in IBA7220.
+	 * We dynamically specify only the mask for LINKTRAININGSTATE
+	 * and only the shift for LINKSTATE, as they are the only ones
+	 * that change.  Also precalculate the 3 link states of interest
+	 * and the combined mask.
+	 */
+	dd->ibcs_ls_shift = IBA7220_IBCS_LINKSTATE_SHIFT;
+	dd->ibcs_lts_mask = IBA7220_IBCS_LINKTRAININGSTATE_MASK;
+	dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
+		dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
+	dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+		INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+		(INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
+	dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+		INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+		(INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
+	dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+		INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+		(INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
+
+	/*
+	 * Fill in data for ibcc field-values that change in IBA7220.
+	 * We dynamically specify only the mask for LINKINITCMD
+	 * and only the shift for LINKCMD and MAXPKTLEN, as they are
+	 * the only ones that change.
+	 */
+	dd->ibcc_lic_mask = IBA7220_IBCC_LINKINITCMD_MASK;
+	dd->ibcc_lc_shift = IBA7220_IBCC_LINKCMD_SHIFT;
+	dd->ibcc_mpl_shift = IBA7220_IBCC_MAXPKTLEN_SHIFT;
+
+	/* Fill in shifts for RcvCtrl. */
+	dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
+	dd->ipath_r_intravail_shift = IBA7220_R_INTRAVAIL_SHIFT;
+	dd->ipath_r_tailupd_shift = IBA7220_R_TAILUPD_SHIFT;
+	dd->ipath_r_portcfg_shift = IBA7220_R_PORTCFG_SHIFT;
+
+	/* variables for sanity checking interrupt and errors */
+	dd->ipath_hwe_bitsextant =
+		(INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+		 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
+		(INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+		 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
+		(INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
+		 INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
+		INFINIPATH_HWE_PCIE1PLLFAILED |
+		INFINIPATH_HWE_PCIE0PLLFAILED |
+		INFINIPATH_HWE_PCIEPOISONEDTLP |
+		INFINIPATH_HWE_PCIECPLTIMEOUT |
+		INFINIPATH_HWE_PCIEBUSPARITYXTLH |
+		INFINIPATH_HWE_PCIEBUSPARITYXADM |
+		INFINIPATH_HWE_PCIEBUSPARITYRADM |
+		INFINIPATH_HWE_MEMBISTFAILED |
+		INFINIPATH_HWE_COREPLL_FBSLIP |
+		INFINIPATH_HWE_COREPLL_RFSLIP |
+		INFINIPATH_HWE_SERDESPLLFAILED |
+		INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
+		INFINIPATH_HWE_IBCBUSFRSPCPARITYERR |
+		INFINIPATH_HWE_PCIECPLDATAQUEUEERR |
+		INFINIPATH_HWE_PCIECPLHDRQUEUEERR |
+		INFINIPATH_HWE_SDMAMEMREADERR |
+		INFINIPATH_HWE_CLK_UC_PLLNOTLOCKED |
+		INFINIPATH_HWE_PCIESERDESQ0PCLKNOTDETECT |
+		INFINIPATH_HWE_PCIESERDESQ1PCLKNOTDETECT |
+		INFINIPATH_HWE_PCIESERDESQ2PCLKNOTDETECT |
+		INFINIPATH_HWE_PCIESERDESQ3PCLKNOTDETECT |
+		INFINIPATH_HWE_DDSRXEQMEMORYPARITYERR |
+		INFINIPATH_HWE_IB_UC_MEMORYPARITYERR |
+		INFINIPATH_HWE_PCIE_UC_OCT0MEMORYPARITYERR |
+		INFINIPATH_HWE_PCIE_UC_OCT1MEMORYPARITYERR;
+	dd->ipath_i_bitsextant =
+		INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED |
+		(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
+		(INFINIPATH_I_RCVAVAIL_MASK <<
+		 INFINIPATH_I_RCVAVAIL_SHIFT) |
+		INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
+		INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO |
+		INFINIPATH_I_JINT | INFINIPATH_I_SERDESTRIMDONE;
+	dd->ipath_e_bitsextant =
+		INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
+		INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
+		INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
+		INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
+		INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
+		INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
+		INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+		INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
+		INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
+		INFINIPATH_E_SENDSPECIALTRIGGER |
+		INFINIPATH_E_SDMADISABLED | INFINIPATH_E_SMINPKTLEN |
+		INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNDERRUN |
+		INFINIPATH_E_SPKTLEN | INFINIPATH_E_SDROPPEDSMPPKT |
+		INFINIPATH_E_SDROPPEDDATAPKT |
+		INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
+		INFINIPATH_E_SUNSUPVL | INFINIPATH_E_SENDBUFMISUSE |
+		INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND |
+		INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE |
+		INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG |
+		INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW |
+		INFINIPATH_E_SDMAUNEXPDATA |
+		INFINIPATH_E_IBSTATUSCHANGED | INFINIPATH_E_INVALIDADDR |
+		INFINIPATH_E_RESET | INFINIPATH_E_HARDWARE |
+		INFINIPATH_E_SDMADESCADDRMISALIGN |
+		INFINIPATH_E_INVALIDEEPCMD;
+
+	dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+	dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+	dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
+	dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
+	dd->ipath_flags |= IPATH_INTREG_64 | IPATH_HAS_MULT_IB_SPEED
+		| IPATH_HAS_LINK_LATENCY;
+
+	/*
+	 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+	 * 2 is Some Misc, 3 is reserved for future.
+	 */
+	dd->ipath_eep_st_masks[0].hwerrs_to_log =
+		INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+		INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
+
+	dd->ipath_eep_st_masks[1].hwerrs_to_log =
+		INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+		INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
+
+	dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
+
+	ipath_linkrecovery = 0;
+
+	init_waitqueue_head(&dd->ipath_autoneg_wait);
+	INIT_DELAYED_WORK(&dd->ipath_autoneg_work,  autoneg_work);
+
+	dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
+	dd->ipath_link_speed_supported = IPATH_IB_SDR | IPATH_IB_DDR;
+
+	dd->ipath_link_width_enabled = dd->ipath_link_width_supported;
+	dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
+	/*
+	 * set the initial values to reasonable default, will be set
+	 * for real when link is up.
+	 */
+	dd->ipath_link_width_active = IB_WIDTH_4X;
+	dd->ipath_link_speed_active = IPATH_IB_SDR;
+	dd->delay_mult = rate_to_delay[0][1];
+}
+
+
+/*
+ * Setup the MSI stuff again after a reset.  I'd like to just call
+ * pci_enable_msi() and request_irq() again, but when I do that,
+ * the MSI enable bit doesn't get set in the command word, and
+ * we switch to to a different interrupt vector, which is confusing,
+ * so I instead just do it all inline.  Perhaps somehow can tie this
+ * into the PCIe hotplug support at some point
+ * Note, because I'm doing it all here, I don't call pci_disable_msi()
+ * or free_irq() at the start of ipath_setup_7220_reset().
+ */
+static int ipath_reinit_msi(struct ipath_devdata *dd)
+{
+	int ret = 0;
+#ifdef CONFIG_PCI_MSI
+	int pos;
+	u16 control;
+	if (!dd->ipath_msi_lo) /* Using intX, or init problem */
+		goto bail;
+
+	pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI);
+	if (!pos) {
+		ipath_dev_err(dd, "Can't find MSI capability, "
+			      "can't restore MSI settings\n");
+		goto bail;
+	}
+	ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
+		   dd->ipath_msi_lo, pos + PCI_MSI_ADDRESS_LO);
+	pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
+			       dd->ipath_msi_lo);
+	ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
+		   dd->ipath_msi_hi, pos + PCI_MSI_ADDRESS_HI);
+	pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
+			       dd->ipath_msi_hi);
+	pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control);
+	if (!(control & PCI_MSI_FLAGS_ENABLE)) {
+		ipath_cdbg(VERBOSE, "MSI control at off %x was %x, "
+			   "setting MSI enable (%x)\n", pos + PCI_MSI_FLAGS,
+			   control, control | PCI_MSI_FLAGS_ENABLE);
+		control |= PCI_MSI_FLAGS_ENABLE;
+		pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
+				      control);
+	}
+	/* now rewrite the data (vector) info */
+	pci_write_config_word(dd->pcidev, pos +
+			      ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
+			      dd->ipath_msi_data);
+	ret = 1;
+bail:
+#endif
+	if (!ret) {
+		ipath_dbg("Using IntX, MSI disabled or not configured\n");
+		ipath_enable_intx(dd->pcidev);
+		ret = 1;
+	}
+	/*
+	 * We restore the cachelinesize also, although it doesn't really
+	 * matter.
+	 */
+	pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
+			      dd->ipath_pci_cacheline);
+	/* and now set the pci master bit again */
+	pci_set_master(dd->pcidev);
+
+	return ret;
+}
+
+/*
+ * This routine sleeps, so it can only be called from user context, not
+ * from interrupt context.  If we need interrupt context, we can split
+ * it into two routines.
+ */
+static int ipath_setup_7220_reset(struct ipath_devdata *dd)
+{
+	u64 val;
+	int i;
+	int ret;
+	u16 cmdval;
+
+	pci_read_config_word(dd->pcidev, PCI_COMMAND, &cmdval);
+
+	/* Use dev_err so it shows up in logs, etc. */
+	ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
+
+	/* keep chip from being accessed in a few places */
+	dd->ipath_flags &= ~(IPATH_INITTED | IPATH_PRESENT);
+	val = dd->ipath_control | INFINIPATH_C_RESET;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_control, val);
+	mb();
+
+	for (i = 1; i <= 5; i++) {
+		int r;
+
+		/*
+		 * Allow MBIST, etc. to complete; longer on each retry.
+		 * We sometimes get machine checks from bus timeout if no
+		 * response, so for now, make it *really* long.
+		 */
+		msleep(1000 + (1 + i) * 2000);
+		r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
+					   dd->ipath_pcibar0);
+		if (r)
+			ipath_dev_err(dd, "rewrite of BAR0 failed: %d\n", r);
+		r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
+					   dd->ipath_pcibar1);
+		if (r)
+			ipath_dev_err(dd, "rewrite of BAR1 failed: %d\n", r);
+		/* now re-enable memory access */
+		pci_write_config_word(dd->pcidev, PCI_COMMAND, cmdval);
+		r = pci_enable_device(dd->pcidev);
+		if (r)
+			ipath_dev_err(dd, "pci_enable_device failed after "
+				      "reset: %d\n", r);
+		/*
+		 * whether it fully enabled or not, mark as present,
+		 * again (but not INITTED)
+		 */
+		dd->ipath_flags |= IPATH_PRESENT;
+		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
+		if (val == dd->ipath_revision) {
+			ipath_cdbg(VERBOSE, "Got matching revision "
+				   "register %llx on try %d\n",
+				   (unsigned long long) val, i);
+			ret = ipath_reinit_msi(dd);
+			goto bail;
+		}
+		/* Probably getting -1 back */
+		ipath_dbg("Didn't get expected revision register, "
+			  "got %llx, try %d\n", (unsigned long long) val,
+			  i + 1);
+	}
+	ret = 0; /* failed */
+
+bail:
+	if (ret)
+		ipath_7220_pcie_params(dd, dd->ipath_boardrev);
+
+	return ret;
+}
+
+/**
+ * ipath_7220_put_tid - write a TID to the chip
+ * @dd: the infinipath device
+ * @tidptr: pointer to the expected TID (in chip) to udpate
+ * @tidtype: 0 for eager, 1 for expected
+ * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for selection of the
+ * appropriate "flavor". The static calls in cleanup just use the
+ * revision-agnostic form, as they are not performance critical.
+ */
+static void ipath_7220_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
+			     u32 type, unsigned long pa)
+{
+	if (pa != dd->ipath_tidinvalid) {
+		u64 chippa = pa >> IBA7220_TID_PA_SHIFT;
+
+		/* paranoia checks */
+		if (pa != (chippa << IBA7220_TID_PA_SHIFT)) {
+			dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
+				 "not 2KB aligned!\n", pa);
+			return;
+		}
+		if (pa >= (1UL << IBA7220_TID_SZ_SHIFT)) {
+			ipath_dev_err(dd,
+				      "BUG: Physical page address 0x%lx "
+				      "larger than supported\n", pa);
+			return;
+		}
+
+		if (type == RCVHQ_RCV_TYPE_EAGER)
+			chippa |= dd->ipath_tidtemplate;
+		else /* for now, always full 4KB page */
+			chippa |= IBA7220_TID_SZ_4K;
+		writeq(chippa, tidptr);
+	} else
+		writeq(pa, tidptr);
+	mmiowb();
+}
+
+/**
+ * ipath_7220_clear_tid - clear all TID entries for a port, expected and eager
+ * @dd: the infinipath device
+ * @port: the port
+ *
+ * clear all TID entries for a port, expected and eager.
+ * Used from ipath_close().  On this chip, TIDs are only 32 bits,
+ * not 64, but they are still on 64 bit boundaries, so tidbase
+ * is declared as u64 * for the pointer math, even though we write 32 bits
+ */
+static void ipath_7220_clear_tids(struct ipath_devdata *dd, unsigned port)
+{
+	u64 __iomem *tidbase;
+	unsigned long tidinv;
+	int i;
+
+	if (!dd->ipath_kregbase)
+		return;
+
+	ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
+
+	tidinv = dd->ipath_tidinvalid;
+	tidbase = (u64 __iomem *)
+		((char __iomem *)(dd->ipath_kregbase) +
+		 dd->ipath_rcvtidbase +
+		 port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
+
+	for (i = 0; i < dd->ipath_rcvtidcnt; i++)
+		ipath_7220_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+				   tidinv);
+
+	tidbase = (u64 __iomem *)
+		((char __iomem *)(dd->ipath_kregbase) +
+		 dd->ipath_rcvegrbase + port_egrtid_idx(dd, port)
+		 * sizeof(*tidbase));
+
+	for (i = port ? dd->ipath_rcvegrcnt : dd->ipath_p0_rcvegrcnt; i; i--)
+		ipath_7220_put_tid(dd, &tidbase[i-1], RCVHQ_RCV_TYPE_EAGER,
+			tidinv);
+}
+
+/**
+ * ipath_7220_tidtemplate - setup constants for TID updates
+ * @dd: the infinipath device
+ *
+ * We setup stuff that we use a lot, to avoid calculating each time
+ */
+static void ipath_7220_tidtemplate(struct ipath_devdata *dd)
+{
+	/* For now, we always allocate 4KB buffers (at init) so we can
+	 * receive max size packets.  We may want a module parameter to
+	 * specify 2KB or 4KB and/or make be per port instead of per device
+	 * for those who want to reduce memory footprint.  Note that the
+	 * ipath_rcvhdrentsize size must be large enough to hold the largest
+	 * IB header (currently 96 bytes) that we expect to handle (plus of
+	 * course the 2 dwords of RHF).
+	 */
+	if (dd->ipath_rcvegrbufsize == 2048)
+		dd->ipath_tidtemplate = IBA7220_TID_SZ_2K;
+	else if (dd->ipath_rcvegrbufsize == 4096)
+		dd->ipath_tidtemplate = IBA7220_TID_SZ_4K;
+	else {
+		dev_info(&dd->pcidev->dev, "BUG: unsupported egrbufsize "
+			 "%u, using %u\n", dd->ipath_rcvegrbufsize,
+			 4096);
+		dd->ipath_tidtemplate = IBA7220_TID_SZ_4K;
+	}
+	dd->ipath_tidinvalid = 0;
+}
+
+static int ipath_7220_early_init(struct ipath_devdata *dd)
+{
+	u32 i, s;
+
+	if (strcmp(int_type, "auto") &&
+	    strcmp(int_type, "force_msi") &&
+	    strcmp(int_type, "force_intx")) {
+		ipath_dev_err(dd, "Invalid interrupt_type: '%s', expecting "
+			      "auto, force_msi or force_intx\n", int_type);
+		return -EINVAL;
+	}
+
+	/*
+	 * Control[4] has been added to change the arbitration within
+	 * the SDMA engine between favoring data fetches over descriptor
+	 * fetches.  ipath_sdma_fetch_arb==0 gives data fetches priority.
+	 */
+	if (ipath_sdma_fetch_arb && (dd->ipath_minrev > 1))
+		dd->ipath_control |= 1<<4;
+
+	dd->ipath_flags |= IPATH_4BYTE_TID;
+
+	/*
+	 * For openfabrics, we need to be able to handle an IB header of
+	 * 24 dwords.  HT chip has arbitrary sized receive buffers, so we
+	 * made them the same size as the PIO buffers.  This chip does not
+	 * handle arbitrary size buffers, so we need the header large enough
+	 * to handle largest IB header, but still have room for a 2KB MTU
+	 * standard IB packet.
+	 */
+	dd->ipath_rcvhdrentsize = 24;
+	dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
+	dd->ipath_rhf_offset =
+		dd->ipath_rcvhdrentsize - sizeof(u64) / sizeof(u32);
+
+	dd->ipath_rcvegrbufsize = ipath_mtu4096 ? 4096 : 2048;
+	/*
+	 * the min() check here is currently a nop, but it may not always
+	 * be, depending on just how we do ipath_rcvegrbufsize
+	 */
+	dd->ipath_ibmaxlen = min(ipath_mtu4096 ? dd->ipath_piosize4k :
+				 dd->ipath_piosize2k,
+				 dd->ipath_rcvegrbufsize +
+				 (dd->ipath_rcvhdrentsize << 2));
+	dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
+
+	ipath_7220_config_jint(dd, INFINIPATH_JINT_DEFAULT_IDLE_TICKS,
+			       INFINIPATH_JINT_DEFAULT_MAX_PACKETS);
+
+	if (dd->ipath_boardrev) /* no eeprom on emulator */
+		ipath_get_eeprom_info(dd);
+
+	/* start of code to check and print procmon */
+	s = ipath_read_kreg32(dd, IPATH_KREG_OFFSET(ProcMon));
+	s &= ~(1U<<31); /* clear done bit */
+	s |= 1U<<14; /* clear counter (write 1 to clear) */
+	ipath_write_kreg(dd, IPATH_KREG_OFFSET(ProcMon), s);
+	/* make sure clear_counter low long enough before start */
+	ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+	ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+
+	s &= ~(1U<<14); /* allow counter to count (before starting) */
+	ipath_write_kreg(dd, IPATH_KREG_OFFSET(ProcMon), s);
+	ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+	ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+	s = ipath_read_kreg32(dd, IPATH_KREG_OFFSET(ProcMon));
+
+	s |= 1U<<15; /* start the counter */
+	s &= ~(1U<<31); /* clear done bit */
+	s &= ~0x7ffU; /* clear frequency bits */
+	s |= 0xe29; /* set frequency bits, in case cleared */
+	ipath_write_kreg(dd, IPATH_KREG_OFFSET(ProcMon), s);
+
+	s = 0;
+	for (i = 500; i > 0 && !(s&(1ULL<<31)); i--) {
+		ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+		s = ipath_read_kreg32(dd, IPATH_KREG_OFFSET(ProcMon));
+	}
+	if (!(s&(1U<<31)))
+		ipath_dev_err(dd, "ProcMon register not valid: 0x%x\n", s);
+	else
+		ipath_dbg("ProcMon=0x%x, count=0x%x\n", s, (s>>16)&0x1ff);
+
+	return 0;
+}
+
+/**
+ * ipath_init_7220_get_base_info - set chip-specific flags for user code
+ * @pd: the infinipath port
+ * @kbase: ipath_base_info pointer
+ *
+ * We set the PCIE flag because the lower bandwidth on PCIe vs
+ * HyperTransport can affect some user packet algorithims.
+ */
+static int ipath_7220_get_base_info(struct ipath_portdata *pd, void *kbase)
+{
+	struct ipath_base_info *kinfo = kbase;
+
+	kinfo->spi_runtime_flags |=
+		IPATH_RUNTIME_PCIE | IPATH_RUNTIME_NODMA_RTAIL |
+		IPATH_RUNTIME_SDMA;
+
+	return 0;
+}
+
+static void ipath_7220_free_irq(struct ipath_devdata *dd)
+{
+	free_irq(dd->ipath_irq, dd);
+	dd->ipath_irq = 0;
+}
+
+static struct ipath_message_header *
+ipath_7220_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
+{
+	u32 offset = ipath_hdrget_offset(rhf_addr);
+
+	return (struct ipath_message_header *)
+		(rhf_addr - dd->ipath_rhf_offset + offset);
+}
+
+static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports)
+{
+	u32 nchipports;
+
+	nchipports = ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
+	if (!cfgports) {
+		int ncpus = num_online_cpus();
+
+		if (ncpus <= 4)
+			dd->ipath_portcnt = 5;
+		else if (ncpus <= 8)
+			dd->ipath_portcnt = 9;
+		if (dd->ipath_portcnt)
+			ipath_dbg("Auto-configured for %u ports, %d cpus "
+				"online\n", dd->ipath_portcnt, ncpus);
+	} else if (cfgports <= nchipports)
+		dd->ipath_portcnt = cfgports;
+	if (!dd->ipath_portcnt) /* none of the above, set to max */
+		dd->ipath_portcnt = nchipports;
+	/*
+	 * chip can be configured for 5, 9, or 17 ports, and choice
+	 * affects number of eager TIDs per port (1K, 2K, 4K).
+	 */
+	if (dd->ipath_portcnt > 9)
+		dd->ipath_rcvctrl |= 2ULL << IBA7220_R_PORTCFG_SHIFT;
+	else if (dd->ipath_portcnt > 5)
+		dd->ipath_rcvctrl |= 1ULL << IBA7220_R_PORTCFG_SHIFT;
+	/* else configure for default 5 receive ports */
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+			 dd->ipath_rcvctrl);
+	dd->ipath_p0_rcvegrcnt = 2048; /* always */
+	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+		dd->ipath_pioreserved = 1; /* reserve a buffer */
+}
+
+
+static int ipath_7220_get_ib_cfg(struct ipath_devdata *dd, int which)
+{
+	int lsb, ret = 0;
+	u64 maskr; /* right-justified mask */
+
+	switch (which) {
+	case IPATH_IB_CFG_HRTBT: /* Get Heartbeat off/enable/auto */
+		lsb = IBA7220_IBC_HRTBT_SHIFT;
+		maskr = IBA7220_IBC_HRTBT_MASK;
+		break;
+
+	case IPATH_IB_CFG_LWID_ENB: /* Get allowed Link-width */
+		ret = dd->ipath_link_width_enabled;
+		goto done;
+
+	case IPATH_IB_CFG_LWID: /* Get currently active Link-width */
+		ret = dd->ipath_link_width_active;
+		goto done;
+
+	case IPATH_IB_CFG_SPD_ENB: /* Get allowed Link speeds */
+		ret = dd->ipath_link_speed_enabled;
+		goto done;
+
+	case IPATH_IB_CFG_SPD: /* Get current Link spd */
+		ret = dd->ipath_link_speed_active;
+		goto done;
+
+	case IPATH_IB_CFG_RXPOL_ENB: /* Get Auto-RX-polarity enable */
+		lsb = IBA7220_IBC_RXPOL_SHIFT;
+		maskr = IBA7220_IBC_RXPOL_MASK;
+		break;
+
+	case IPATH_IB_CFG_LREV_ENB: /* Get Auto-Lane-reversal enable */
+		lsb = IBA7220_IBC_LREV_SHIFT;
+		maskr = IBA7220_IBC_LREV_MASK;
+		break;
+
+	case IPATH_IB_CFG_LINKLATENCY:
+		ret = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcddrstatus)
+			& IBA7220_DDRSTAT_LINKLAT_MASK;
+		goto done;
+
+	default:
+		ret = -ENOTSUPP;
+		goto done;
+	}
+	ret = (int)((dd->ipath_ibcddrctrl >> lsb) & maskr);
+done:
+	return ret;
+}
+
+static int ipath_7220_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
+{
+	int lsb, ret = 0, setforce = 0;
+	u64 maskr; /* right-justified mask */
+
+	switch (which) {
+	case IPATH_IB_CFG_LIDLMC:
+		/*
+		 * Set LID and LMC. Combined to avoid possible hazard
+		 * caller puts LMC in 16MSbits, DLID in 16LSbits of val
+		 */
+		lsb = IBA7220_IBC_DLIDLMC_SHIFT;
+		maskr = IBA7220_IBC_DLIDLMC_MASK;
+		break;
+
+	case IPATH_IB_CFG_HRTBT: /* set Heartbeat off/enable/auto */
+		if (val & IPATH_IB_HRTBT_ON &&
+			(dd->ipath_flags & IPATH_NO_HRTBT))
+			goto bail;
+		lsb = IBA7220_IBC_HRTBT_SHIFT;
+		maskr = IBA7220_IBC_HRTBT_MASK;
+		break;
+
+	case IPATH_IB_CFG_LWID_ENB: /* set allowed Link-width */
+		/*
+		 * As with speed, only write the actual register if
+		 * the link is currently down, otherwise takes effect
+		 * on next link change.
+		 */
+		dd->ipath_link_width_enabled = val;
+		if ((dd->ipath_flags & (IPATH_LINKDOWN|IPATH_LINKINIT)) !=
+			IPATH_LINKDOWN)
+			goto bail;
+		/*
+		 * We set the IPATH_IB_FORCE_NOTIFY bit so updown
+		 * will get called because we want update
+		 * link_width_active, and the change may not take
+		 * effect for some time (if we are in POLL), so this
+		 * flag will force the updown routine to be called
+		 * on the next ibstatuschange down interrupt, even
+		 * if it's not an down->up transition.
+		 */
+		val--; /* convert from IB to chip */
+		maskr = IBA7220_IBC_WIDTH_MASK;
+		lsb = IBA7220_IBC_WIDTH_SHIFT;
+		setforce = 1;
+		dd->ipath_flags |= IPATH_IB_FORCE_NOTIFY;
+		break;
+
+	case IPATH_IB_CFG_SPD_ENB: /* set allowed Link speeds */
+		/*
+		 * If we turn off IB1.2, need to preset SerDes defaults,
+		 * but not right now. Set a flag for the next time
+		 * we command the link down.  As with width, only write the
+		 * actual register if the link is currently down, otherwise
+		 * takes effect on next link change.  Since setting is being
+		 * explictly requested (via MAD or sysfs), clear autoneg
+		 * failure status if speed autoneg is enabled.
+		 */
+		dd->ipath_link_speed_enabled = val;
+		if (dd->ipath_ibcddrctrl & IBA7220_IBC_IBTA_1_2_MASK &&
+		    !(val & (val - 1)))
+			dd->ipath_presets_needed = 1;
+		if ((dd->ipath_flags & (IPATH_LINKDOWN|IPATH_LINKINIT)) !=
+			IPATH_LINKDOWN)
+			goto bail;
+		/*
+		 * We set the IPATH_IB_FORCE_NOTIFY bit so updown
+		 * will get called because we want update
+		 * link_speed_active, and the change may not take
+		 * effect for some time (if we are in POLL), so this
+		 * flag will force the updown routine to be called
+		 * on the next ibstatuschange down interrupt, even
+		 * if it's not an down->up transition.  When setting
+		 * speed autoneg, clear AUTONEG_FAILED.
+		 */
+		if (val == (IPATH_IB_SDR | IPATH_IB_DDR)) {
+			val = IBA7220_IBC_SPEED_AUTONEG_MASK |
+				IBA7220_IBC_IBTA_1_2_MASK;
+			dd->ipath_flags &= ~IPATH_IB_AUTONEG_FAILED;
+		} else
+			val = val == IPATH_IB_DDR ?  IBA7220_IBC_SPEED_DDR
+				: IBA7220_IBC_SPEED_SDR;
+		maskr = IBA7220_IBC_SPEED_AUTONEG_MASK |
+			IBA7220_IBC_IBTA_1_2_MASK;
+		lsb = 0; /* speed bits are low bits */
+		setforce = 1;
+		break;
+
+	case IPATH_IB_CFG_RXPOL_ENB: /* set Auto-RX-polarity enable */
+		lsb = IBA7220_IBC_RXPOL_SHIFT;
+		maskr = IBA7220_IBC_RXPOL_MASK;
+		break;
+
+	case IPATH_IB_CFG_LREV_ENB: /* set Auto-Lane-reversal enable */
+		lsb = IBA7220_IBC_LREV_SHIFT;
+		maskr = IBA7220_IBC_LREV_MASK;
+		break;
+
+	default:
+		ret = -ENOTSUPP;
+		goto bail;
+	}
+	dd->ipath_ibcddrctrl &= ~(maskr << lsb);
+	dd->ipath_ibcddrctrl |= (((u64) val & maskr) << lsb);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl,
+			 dd->ipath_ibcddrctrl);
+	if (setforce)
+		dd->ipath_flags |= IPATH_IB_FORCE_NOTIFY;
+bail:
+	return ret;
+}
+
+static void ipath_7220_read_counters(struct ipath_devdata *dd,
+				     struct infinipath_counters *cntrs)
+{
+	u64 *counters = (u64 *) cntrs;
+	int i;
+
+	for (i = 0; i < sizeof(*cntrs) / sizeof(u64); i++)
+		counters[i] = ipath_snap_cntr(dd, i);
+}
+
+/* if we are using MSI, try to fallback to IntX */
+static int ipath_7220_intr_fallback(struct ipath_devdata *dd)
+{
+	if (dd->ipath_msi_lo) {
+		dev_info(&dd->pcidev->dev, "MSI interrupt not detected,"
+			" trying IntX interrupts\n");
+		ipath_7220_nomsi(dd);
+		ipath_enable_intx(dd->pcidev);
+		/*
+		 * some newer kernels require free_irq before disable_msi,
+		 * and irq can be changed during disable and intx enable
+		 * and we need to therefore use the pcidev->irq value,
+		 * not our saved MSI value.
+		 */
+		dd->ipath_irq = dd->pcidev->irq;
+		if (request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
+			IPATH_DRV_NAME, dd))
+			ipath_dev_err(dd,
+				"Could not re-request_irq for IntX\n");
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * reset the XGXS (between serdes and IBC).  Slightly less intrusive
+ * than resetting the IBC or external link state, and useful in some
+ * cases to cause some retraining.  To do this right, we reset IBC
+ * as well.
+ */
+static void ipath_7220_xgxs_reset(struct ipath_devdata *dd)
+{
+	u64 val, prev_val;
+
+	prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+	val = prev_val | INFINIPATH_XGXS_RESET;
+	prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+			 dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+	ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+			 dd->ipath_control);
+}
+
+
+/* Still needs cleanup, too much hardwired stuff */
+static void autoneg_send(struct ipath_devdata *dd,
+	u32 *hdr, u32 dcnt, u32 *data)
+{
+	int i;
+	u64 cnt;
+	u32 __iomem *piobuf;
+	u32 pnum;
+
+	i = 0;
+	cnt = 7 + dcnt + 1; /* 7 dword header, dword data, icrc */
+	while (!(piobuf = ipath_getpiobuf(dd, cnt, &pnum))) {
+		if (i++ > 15) {
+			ipath_dbg("Couldn't get pio buffer for send\n");
+			return;
+		}
+		udelay(2);
+	}
+	if (dd->ipath_flags&IPATH_HAS_PBC_CNT)
+		cnt |= 0x80000000UL<<32; /* mark as VL15 */
+	writeq(cnt, piobuf);
+	ipath_flush_wc();
+	__iowrite32_copy(piobuf + 2, hdr, 7);
+	__iowrite32_copy(piobuf + 9, data, dcnt);
+	ipath_flush_wc();
+}
+
+/*
+ * _start packet gets sent twice at start, _done gets sent twice at end
+ */
+static void ipath_autoneg_send(struct ipath_devdata *dd, int which)
+{
+	static u32 swapped;
+	u32 dw, i, hcnt, dcnt, *data;
+	static u32 hdr[7] = { 0xf002ffff, 0x48ffff, 0x6400abba };
+	static u32 madpayload_start[0x40] = {
+		0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0,
+		0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+		0x1, 0x1388, 0x15e, 0x1, /* rest 0's */
+		};
+	static u32 madpayload_done[0x40] = {
+		0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0,
+		0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+		0x40000001, 0x1388, 0x15e, /* rest 0's */
+		};
+	dcnt = sizeof(madpayload_start)/sizeof(madpayload_start[0]);
+	hcnt = sizeof(hdr)/sizeof(hdr[0]);
+	if (!swapped) {
+		/* for maintainability, do it at runtime */
+		for (i = 0; i < hcnt; i++) {
+			dw = (__force u32) cpu_to_be32(hdr[i]);
+			hdr[i] = dw;
+		}
+		for (i = 0; i < dcnt; i++) {
+			dw = (__force u32) cpu_to_be32(madpayload_start[i]);
+			madpayload_start[i] = dw;
+			dw = (__force u32) cpu_to_be32(madpayload_done[i]);
+			madpayload_done[i] = dw;
+		}
+		swapped = 1;
+	}
+
+	data = which ? madpayload_done : madpayload_start;
+	ipath_cdbg(PKT, "Sending %s special MADs\n", which?"done":"start");
+
+	autoneg_send(dd, hdr, dcnt, data);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	udelay(2);
+	autoneg_send(dd, hdr, dcnt, data);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	udelay(2);
+}
+
+
+
+/*
+ * Do the absolute minimum to cause an IB speed change, and make it
+ * ready, but don't actually trigger the change.   The caller will
+ * do that when ready (if link is in Polling training state, it will
+ * happen immediately, otherwise when link next goes down)
+ *
+ * This routine should only be used as part of the DDR autonegotation
+ * code for devices that are not compliant with IB 1.2 (or code that
+ * fixes things up for same).
+ *
+ * When link has gone down, and autoneg enabled, or autoneg has
+ * failed and we give up until next time we set both speeds, and
+ * then we want IBTA enabled as well as "use max enabled speed.
+ */
+static void set_speed_fast(struct ipath_devdata *dd, u32 speed)
+{
+	dd->ipath_ibcddrctrl &= ~(IBA7220_IBC_SPEED_AUTONEG_MASK |
+		IBA7220_IBC_IBTA_1_2_MASK |
+		(IBA7220_IBC_WIDTH_MASK << IBA7220_IBC_WIDTH_SHIFT));
+
+	if (speed == (IPATH_IB_SDR | IPATH_IB_DDR))
+		dd->ipath_ibcddrctrl |= IBA7220_IBC_SPEED_AUTONEG_MASK |
+			IBA7220_IBC_IBTA_1_2_MASK;
+	else
+		dd->ipath_ibcddrctrl |= speed == IPATH_IB_DDR ?
+			IBA7220_IBC_SPEED_DDR : IBA7220_IBC_SPEED_SDR;
+
+	/*
+	 * Convert from IB-style 1 = 1x, 2 = 4x, 3 = auto
+	 * to chip-centric       0 = 1x, 1 = 4x, 2 = auto
+	 */
+	dd->ipath_ibcddrctrl |= (u64)(dd->ipath_link_width_enabled - 1) <<
+		IBA7220_IBC_WIDTH_SHIFT;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl,
+			dd->ipath_ibcddrctrl);
+	ipath_cdbg(VERBOSE, "setup for IB speed (%x) done\n", speed);
+}
+
+
+/*
+ * this routine is only used when we are not talking to another
+ * IB 1.2-compliant device that we think can do DDR.
+ * (This includes all existing switch chips as of Oct 2007.)
+ * 1.2-compliant devices go directly to DDR prior to reaching INIT
+ */
+static void try_auto_neg(struct ipath_devdata *dd)
+{
+	/*
+	 * required for older non-IB1.2 DDR switches.  Newer
+	 * non-IB-compliant switches don't need it, but so far,
+	 * aren't bothered by it either.  "Magic constant"
+	 */
+	ipath_write_kreg(dd, IPATH_KREG_OFFSET(IBNCModeCtrl),
+		0x3b9dc07);
+	dd->ipath_flags |= IPATH_IB_AUTONEG_INPROG;
+	ipath_autoneg_send(dd, 0);
+	set_speed_fast(dd, IPATH_IB_DDR);
+	ipath_toggle_rclkrls(dd);
+	/* 2 msec is minimum length of a poll cycle */
+	schedule_delayed_work(&dd->ipath_autoneg_work,
+		msecs_to_jiffies(2));
+}
+
+
+static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
+{
+	int ret = 0;
+	u32 ltstate = ipath_ib_linkstate(dd, ibcs);
+
+	dd->ipath_link_width_active =
+		((ibcs >> IBA7220_IBCS_LINKWIDTH_SHIFT) & 1) ?
+		    IB_WIDTH_4X : IB_WIDTH_1X;
+	dd->ipath_link_speed_active =
+		((ibcs >> IBA7220_IBCS_LINKSPEED_SHIFT) & 1) ?
+		    IPATH_IB_DDR : IPATH_IB_SDR;
+
+	if (!ibup) {
+		/*
+		 * when link goes down we don't want aeq running, so it
+		 * won't't interfere with IBC training, etc., and we need
+		 * to go back to the static SerDes preset values
+		 */
+		if (dd->ipath_x1_fix_tries &&
+			 ltstate <= INFINIPATH_IBCS_LT_STATE_SLEEPQUIET &&
+			ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP)
+			dd->ipath_x1_fix_tries = 0;
+		if (!(dd->ipath_flags & (IPATH_IB_AUTONEG_FAILED |
+			IPATH_IB_AUTONEG_INPROG)))
+			set_speed_fast(dd, dd->ipath_link_speed_enabled);
+		if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)) {
+			ipath_cdbg(VERBOSE, "Setting RXEQ defaults\n");
+			ipath_sd7220_presets(dd);
+		}
+		/* this might better in ipath_sd7220_presets() */
+		ipath_set_relock_poll(dd, ibup);
+	} else {
+		if (ipath_compat_ddr_negotiate &&
+		    !(dd->ipath_flags & (IPATH_IB_AUTONEG_FAILED |
+			IPATH_IB_AUTONEG_INPROG)) &&
+			dd->ipath_link_speed_active == IPATH_IB_SDR &&
+			(dd->ipath_link_speed_enabled &
+			    (IPATH_IB_DDR | IPATH_IB_SDR)) ==
+			    (IPATH_IB_DDR | IPATH_IB_SDR) &&
+			dd->ipath_autoneg_tries < IPATH_AUTONEG_TRIES) {
+			/* we are SDR, and DDR auto-negotiation enabled */
+			++dd->ipath_autoneg_tries;
+			ipath_dbg("DDR negotiation try, %u/%u\n",
+				dd->ipath_autoneg_tries,
+				IPATH_AUTONEG_TRIES);
+			try_auto_neg(dd);
+			ret = 1; /* no other IB status change processing */
+		} else if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)
+			&& dd->ipath_link_speed_active == IPATH_IB_SDR) {
+			ipath_autoneg_send(dd, 1);
+			set_speed_fast(dd, IPATH_IB_DDR);
+			udelay(2);
+			ipath_toggle_rclkrls(dd);
+			ret = 1; /* no other IB status change processing */
+		} else {
+			if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
+				(dd->ipath_link_speed_active & IPATH_IB_DDR)) {
+				ipath_dbg("Got to INIT with DDR autoneg\n");
+				dd->ipath_flags &= ~(IPATH_IB_AUTONEG_INPROG
+					| IPATH_IB_AUTONEG_FAILED);
+				dd->ipath_autoneg_tries = 0;
+				/* re-enable SDR, for next link down */
+				set_speed_fast(dd,
+					dd->ipath_link_speed_enabled);
+				wake_up(&dd->ipath_autoneg_wait);
+			} else if (dd->ipath_flags & IPATH_IB_AUTONEG_FAILED) {
+				/*
+				 * clear autoneg failure flag, and do setup
+				 * so we'll try next time link goes down and
+				 * back to INIT (possibly connected to different
+				 * device).
+				 */
+				ipath_dbg("INIT %sDR after autoneg failure\n",
+					(dd->ipath_link_speed_active &
+					  IPATH_IB_DDR) ? "D" : "S");
+				dd->ipath_flags &= ~IPATH_IB_AUTONEG_FAILED;
+				dd->ipath_ibcddrctrl |=
+					IBA7220_IBC_IBTA_1_2_MASK;
+				ipath_write_kreg(dd,
+					IPATH_KREG_OFFSET(IBNCModeCtrl), 0);
+			}
+		}
+		/*
+		 * if we are in 1X, and are in autoneg width, it
+		 * could be due to an xgxs problem, so if we haven't
+		 * already tried, try twice to get to 4X; if we
+		 * tried, and couldn't, report it, since it will
+		 * probably not be what is desired.
+		 */
+		if ((dd->ipath_link_width_enabled & (IB_WIDTH_1X |
+			IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X)
+			&& dd->ipath_link_width_active == IB_WIDTH_1X
+			&& dd->ipath_x1_fix_tries < 3) {
+			if (++dd->ipath_x1_fix_tries == 3)
+				dev_info(&dd->pcidev->dev,
+					"IB link is in 1X mode\n");
+			else {
+				ipath_cdbg(VERBOSE, "IB 1X in "
+					"auto-width, try %u to be "
+					"sure it's really 1X; "
+					"ltstate %u\n",
+					 dd->ipath_x1_fix_tries,
+					 ltstate);
+				dd->ipath_f_xgxs_reset(dd);
+				ret = 1; /* skip other processing */
+			}
+		}
+
+		if (!ret) {
+			dd->delay_mult = rate_to_delay
+			    [(ibcs >> IBA7220_IBCS_LINKSPEED_SHIFT) & 1]
+			    [(ibcs >> IBA7220_IBCS_LINKWIDTH_SHIFT) & 1];
+
+			ipath_set_relock_poll(dd, ibup);
+		}
+	}
+
+	if (!ret)
+		ipath_setup_7220_setextled(dd, ipath_ib_linkstate(dd, ibcs),
+			ltstate);
+	return ret;
+}
+
+
+/*
+ * Handle the empirically determined mechanism for auto-negotiation
+ * of DDR speed with switches.
+ */
+static void autoneg_work(struct work_struct *work)
+{
+	struct ipath_devdata *dd;
+	u64 startms;
+	u32 lastlts, i;
+
+	dd = container_of(work, struct ipath_devdata,
+		ipath_autoneg_work.work);
+
+	startms = jiffies_to_msecs(jiffies);
+
+	/*
+	 * busy wait for this first part, it should be at most a
+	 * few hundred usec, since we scheduled ourselves for 2msec.
+	 */
+	for (i = 0; i < 25; i++) {
+		lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
+		if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
+			ipath_set_linkstate(dd, IPATH_IB_LINKDOWN_DISABLE);
+			break;
+		}
+		udelay(100);
+	}
+
+	if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG))
+		goto done; /* we got there early or told to stop */
+
+	/* we expect this to timeout */
+	if (wait_event_timeout(dd->ipath_autoneg_wait,
+		!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG),
+		msecs_to_jiffies(90)))
+		goto done;
+
+	ipath_toggle_rclkrls(dd);
+
+	/* we expect this to timeout */
+	if (wait_event_timeout(dd->ipath_autoneg_wait,
+		!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG),
+		msecs_to_jiffies(1700)))
+		goto done;
+
+	set_speed_fast(dd, IPATH_IB_SDR);
+	ipath_toggle_rclkrls(dd);
+
+	/*
+	 * wait up to 250 msec for link to train and get to INIT at DDR;
+	 * this should terminate early
+	 */
+	wait_event_timeout(dd->ipath_autoneg_wait,
+		!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG),
+		msecs_to_jiffies(250));
+done:
+	if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
+		ipath_dbg("Did not get to DDR INIT (%x) after %Lu msecs\n",
+			ipath_ib_state(dd, dd->ipath_lastibcstat),
+			jiffies_to_msecs(jiffies)-startms);
+		dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG;
+		if (dd->ipath_autoneg_tries == IPATH_AUTONEG_TRIES) {
+			dd->ipath_flags |= IPATH_IB_AUTONEG_FAILED;
+			ipath_dbg("Giving up on DDR until next IB "
+				"link Down\n");
+			dd->ipath_autoneg_tries = 0;
+		}
+		set_speed_fast(dd, dd->ipath_link_speed_enabled);
+	}
+}
+
+
+/**
+ * ipath_init_iba7220_funcs - set up the chip-specific function pointers
+ * @dd: the infinipath device
+ *
+ * This is global, and is called directly at init to set up the
+ * chip-specific function pointers for later use.
+ */
+void ipath_init_iba7220_funcs(struct ipath_devdata *dd)
+{
+	dd->ipath_f_intrsetup = ipath_7220_intconfig;
+	dd->ipath_f_bus = ipath_setup_7220_config;
+	dd->ipath_f_reset = ipath_setup_7220_reset;
+	dd->ipath_f_get_boardname = ipath_7220_boardname;
+	dd->ipath_f_init_hwerrors = ipath_7220_init_hwerrors;
+	dd->ipath_f_early_init = ipath_7220_early_init;
+	dd->ipath_f_handle_hwerrors = ipath_7220_handle_hwerrors;
+	dd->ipath_f_quiet_serdes = ipath_7220_quiet_serdes;
+	dd->ipath_f_bringup_serdes = ipath_7220_bringup_serdes;
+	dd->ipath_f_clear_tids = ipath_7220_clear_tids;
+	dd->ipath_f_put_tid = ipath_7220_put_tid;
+	dd->ipath_f_cleanup = ipath_setup_7220_cleanup;
+	dd->ipath_f_setextled = ipath_setup_7220_setextled;
+	dd->ipath_f_get_base_info = ipath_7220_get_base_info;
+	dd->ipath_f_free_irq = ipath_7220_free_irq;
+	dd->ipath_f_tidtemplate = ipath_7220_tidtemplate;
+	dd->ipath_f_intr_fallback = ipath_7220_intr_fallback;
+	dd->ipath_f_xgxs_reset = ipath_7220_xgxs_reset;
+	dd->ipath_f_get_ib_cfg = ipath_7220_get_ib_cfg;
+	dd->ipath_f_set_ib_cfg = ipath_7220_set_ib_cfg;
+	dd->ipath_f_config_jint = ipath_7220_config_jint;
+	dd->ipath_f_config_ports = ipath_7220_config_ports;
+	dd->ipath_f_read_counters = ipath_7220_read_counters;
+	dd->ipath_f_get_msgheader = ipath_7220_get_msgheader;
+	dd->ipath_f_ib_updown = ipath_7220_ib_updown;
+
+	/* initialize chip-specific variables */
+	ipath_init_7220_variables(dd);
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 4471674..27dd894 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -155,24 +155,13 @@
 			 dd->ipath_control);
 
 	/*
-	 * Note that prior to try 14 or 15 of IB, the credit scaling
-	 * wasn't working, because it was swapped for writes with the
-	 * 1 bit default linkstate field
+	 * set initial max size pkt IBC will send, including ICRC; it's the
+	 * PIO buffer size in dwords, less 1; also see ipath_set_mtu()
 	 */
+	val = (dd->ipath_ibmaxlen >> 2) + 1;
+	ibc = val << dd->ibcc_mpl_shift;
 
-	/* ignore pbc and align word */
-	val = dd->ipath_piosize2k - 2 * sizeof(u32);
-	/*
-	 * for ICRC, which we only send in diag test pkt mode, and we
-	 * don't need to worry about that for mtu
-	 */
-	val += 1;
-	/*
-	 * Set the IBC maxpktlength to the size of our pio buffers the
-	 * maxpktlength is in words.  This is *not* the IB data MTU.
-	 */
-	ibc = (val / sizeof(u32)) << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
-	/* in KB */
+	/* flowcontrolwatermark is in units of KBytes */
 	ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT;
 	/*
 	 * How often flowctrl sent.  More or less in usecs; balance against
@@ -191,10 +180,13 @@
 	/*
 	 * Want to start out with both LINKCMD and LINKINITCMD in NOP
 	 * (0 and 0).  Don't put linkinitcmd in ipath_ibcctrl, want that
-	 * to stay a NOP
+	 * to stay a NOP. Flag that we are disabled, for the (unlikely)
+	 * case that some recovery path is trying to bring the link up
+	 * before we are ready.
 	 */
 	ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
 		INFINIPATH_IBCC_LINKINITCMD_SHIFT;
+	dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
 	ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n",
 		   (unsigned long long) ibc);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc);
@@ -227,17 +219,26 @@
 		pd->port_cnt = 1;
 		/* The port 0 pkey table is used by the layer interface. */
 		pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+		pd->port_seq_cnt = 1;
 	}
 	return pd;
 }
 
-static int init_chip_first(struct ipath_devdata *dd,
-			   struct ipath_portdata **pdp)
+static int init_chip_first(struct ipath_devdata *dd)
 {
-	struct ipath_portdata *pd = NULL;
+	struct ipath_portdata *pd;
 	int ret = 0;
 	u64 val;
 
+	spin_lock_init(&dd->ipath_kernel_tid_lock);
+	spin_lock_init(&dd->ipath_user_tid_lock);
+	spin_lock_init(&dd->ipath_sendctrl_lock);
+	spin_lock_init(&dd->ipath_sdma_lock);
+	spin_lock_init(&dd->ipath_gpio_lock);
+	spin_lock_init(&dd->ipath_eep_st_lock);
+	spin_lock_init(&dd->ipath_sdepb_lock);
+	mutex_init(&dd->ipath_eep_lock);
+
 	/*
 	 * skip cfgports stuff because we are not allocating memory,
 	 * and we don't want problems if the portcnt changed due to
@@ -250,12 +251,14 @@
 	else if (ipath_cfgports <= dd->ipath_portcnt) {
 		dd->ipath_cfgports = ipath_cfgports;
 		ipath_dbg("Configured to use %u ports out of %u in chip\n",
-			  dd->ipath_cfgports, dd->ipath_portcnt);
+			  dd->ipath_cfgports, ipath_read_kreg32(dd,
+			  dd->ipath_kregs->kr_portcnt));
 	} else {
 		dd->ipath_cfgports = dd->ipath_portcnt;
 		ipath_dbg("Tried to configured to use %u ports; chip "
 			  "only supports %u\n", ipath_cfgports,
-			  dd->ipath_portcnt);
+			  ipath_read_kreg32(dd,
+				  dd->ipath_kregs->kr_portcnt));
 	}
 	/*
 	 * Allocate full portcnt array, rather than just cfgports, because
@@ -295,12 +298,9 @@
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
 	dd->ipath_piosize2k = val & ~0U;
 	dd->ipath_piosize4k = val >> 32;
-	/*
-	 * Note: the chips support a maximum MTU of 4096, but the driver
-	 * hasn't implemented this feature yet, so set the initial value
-	 * to 2048.
-	 */
-	dd->ipath_ibmtu = 2048;
+	if (dd->ipath_piosize4k == 0 && ipath_mtu4096)
+		ipath_mtu4096 = 0; /* 4KB not supported by this chip */
+	dd->ipath_ibmtu = ipath_mtu4096 ? 4096 : 2048;
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
 	dd->ipath_piobcnt2k = val & ~0U;
 	dd->ipath_piobcnt4k = val >> 32;
@@ -328,43 +328,46 @@
 	else ipath_dbg("%u 2k piobufs @ %p\n",
 		       dd->ipath_piobcnt2k, dd->ipath_pio2kbase);
 
-	spin_lock_init(&dd->ipath_tid_lock);
-	spin_lock_init(&dd->ipath_sendctrl_lock);
-	spin_lock_init(&dd->ipath_gpio_lock);
-	spin_lock_init(&dd->ipath_eep_st_lock);
-	mutex_init(&dd->ipath_eep_lock);
-
 done:
-	*pdp = pd;
 	return ret;
 }
 
 /**
  * init_chip_reset - re-initialize after a reset, or enable
  * @dd: the infinipath device
- * @pdp: output for port data
  *
  * sanity check at least some of the values after reset, and
  * ensure no receive or transmit (explictly, in case reset
  * failed
  */
-static int init_chip_reset(struct ipath_devdata *dd,
-			   struct ipath_portdata **pdp)
+static int init_chip_reset(struct ipath_devdata *dd)
 {
 	u32 rtmp;
+	int i;
+	unsigned long flags;
 
-	*pdp = dd->ipath_pd[0];
-	/* ensure chip does no sends or receives while we re-initialize */
-	dd->ipath_control = dd->ipath_sendctrl = dd->ipath_rcvctrl = 0U;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl);
+	/*
+	 * ensure chip does no sends or receives, tail updates, or
+	 * pioavail updates while we re-initialize
+	 */
+	dd->ipath_rcvctrl &= ~(1ULL << dd->ipath_r_tailupd_shift);
+	for (i = 0; i < dd->ipath_portcnt; i++) {
+		clear_bit(dd->ipath_r_portenable_shift + i,
+			  &dd->ipath_rcvctrl);
+		clear_bit(dd->ipath_r_intravail_shift + i,
+			  &dd->ipath_rcvctrl);
+	}
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+		dd->ipath_rcvctrl);
+
+	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+	dd->ipath_sendctrl = 0U; /* no sdma, etc */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_control, dd->ipath_control);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
 
-	rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
-	if (dd->ipath_portcnt != rtmp)
-		dev_info(&dd->pcidev->dev, "portcnt was %u before "
-			 "reset, now %u, using original\n",
-			 dd->ipath_portcnt, rtmp);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
+
 	rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
 	if (rtmp != dd->ipath_rcvtidcnt)
 		dev_info(&dd->pcidev->dev, "tidcnt was %u before "
@@ -467,10 +470,10 @@
 	dd->ipath_physshadow = addrs;
 }
 
-static void enable_chip(struct ipath_devdata *dd,
-			struct ipath_portdata *pd, int reinit)
+static void enable_chip(struct ipath_devdata *dd, int reinit)
 {
 	u32 val;
+	u64 rcvmask;
 	unsigned long flags;
 	int i;
 
@@ -484,17 +487,28 @@
 	/* Enable PIO send, and update of PIOavail regs to memory. */
 	dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE |
 		INFINIPATH_S_PIOBUFAVAILUPD;
+
+	/*
+	 * Set the PIO avail update threshold to host memory
+	 * on chips that support it.
+	 */
+	if (dd->ipath_pioupd_thresh)
+		dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
+			<< INFINIPATH_S_UPDTHRESH_SHIFT;
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
 	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
 
 	/*
-	 * enable port 0 receive, and receive interrupt.  other ports
-	 * done as user opens and inits them.
+	 * Enable kernel ports' receive and receive interrupt.
+	 * Other ports done as user opens and inits them.
 	 */
-	dd->ipath_rcvctrl = (1ULL << dd->ipath_r_tailupd_shift) |
-		(1ULL << dd->ipath_r_portenable_shift) |
-		(1ULL << dd->ipath_r_intravail_shift);
+	rcvmask = 1ULL;
+	dd->ipath_rcvctrl |= (rcvmask << dd->ipath_r_portenable_shift) |
+		(rcvmask << dd->ipath_r_intravail_shift);
+	if (!(dd->ipath_flags & IPATH_NODMA_RTAIL))
+		dd->ipath_rcvctrl |= (1ULL << dd->ipath_r_tailupd_shift);
+
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
 
@@ -505,16 +519,16 @@
 	dd->ipath_flags |= IPATH_INITTED;
 
 	/*
-	 * init our shadow copies of head from tail values, and write
-	 * head values to match.
+	 * Init our shadow copies of head from tail values,
+	 * and write head values to match.
 	 */
 	val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0);
-	(void)ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
+	ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
 
 	/* Initialize so we interrupt on next packet received */
-	(void)ipath_write_ureg(dd, ur_rcvhdrhead,
-			       dd->ipath_rhdrhead_intr_off |
-			       dd->ipath_pd[0]->port_head, 0);
+	ipath_write_ureg(dd, ur_rcvhdrhead,
+			 dd->ipath_rhdrhead_intr_off |
+			 dd->ipath_pd[0]->port_head, 0);
 
 	/*
 	 * by now pioavail updates to memory should have occurred, so
@@ -523,25 +537,26 @@
 	 * initial values of the generation bit correct.
 	 */
 	for (i = 0; i < dd->ipath_pioavregs; i++) {
-		__le64 val;
+		__le64 pioavail;
 
 		/*
 		 * Chip Errata bug 6641; even and odd qwords>3 are swapped.
 		 */
 		if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
-			val = dd->ipath_pioavailregs_dma[i ^ 1];
+			pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
 		else
-			val = dd->ipath_pioavailregs_dma[i];
-		dd->ipath_pioavailshadow[i] = le64_to_cpu(val);
+			pioavail = dd->ipath_pioavailregs_dma[i];
+		dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail) |
+			(~dd->ipath_pioavailkernel[i] <<
+			INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
 	}
 	/* can get counters, stats, etc. */
 	dd->ipath_flags |= IPATH_PRESENT;
 }
 
-static int init_housekeeping(struct ipath_devdata *dd,
-			     struct ipath_portdata **pdp, int reinit)
+static int init_housekeeping(struct ipath_devdata *dd, int reinit)
 {
-	char boardn[32];
+	char boardn[40];
 	int ret = 0;
 
 	/*
@@ -600,18 +615,9 @@
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
 			 INFINIPATH_E_RESET);
 
-	if (reinit)
-		ret = init_chip_reset(dd, pdp);
-	else
-		ret = init_chip_first(dd, pdp);
-
-	if (ret)
-		goto done;
-
-	ipath_cdbg(VERBOSE, "Revision %llx (PCI %x), %u ports, %u tids, "
-		   "%u egrtids\n", (unsigned long long) dd->ipath_revision,
-		   dd->ipath_pcirev, dd->ipath_portcnt, dd->ipath_rcvtidcnt,
-		   dd->ipath_rcvegrcnt);
+	ipath_cdbg(VERBOSE, "Revision %llx (PCI %x)\n",
+		   (unsigned long long) dd->ipath_revision,
+		   dd->ipath_pcirev);
 
 	if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) &
 	     INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) {
@@ -650,10 +656,39 @@
 
 	ipath_dbg("%s", dd->ipath_boardversion);
 
+	if (ret)
+		goto done;
+
+	if (reinit)
+		ret = init_chip_reset(dd);
+	else
+		ret = init_chip_first(dd);
+
 done:
 	return ret;
 }
 
+static void verify_interrupt(unsigned long opaque)
+{
+	struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
+
+	if (!dd)
+		return; /* being torn down */
+
+	/*
+	 * If we don't have any interrupts, let the user know and
+	 * don't bother checking again.
+	 */
+	if (dd->ipath_int_counter == 0) {
+		if (!dd->ipath_f_intr_fallback(dd))
+			dev_err(&dd->pcidev->dev, "No interrupts detected, "
+				"not usable.\n");
+		else /* re-arm the timer to see if fallback works */
+			mod_timer(&dd->ipath_intrchk_timer, jiffies + HZ/2);
+	} else
+		ipath_cdbg(VERBOSE, "%u interrupts at timer check\n",
+			dd->ipath_int_counter);
+}
 
 /**
  * ipath_init_chip - do the actual initialization sequence on the chip
@@ -676,11 +711,11 @@
 	u32 val32, kpiobufs;
 	u32 piobufs, uports;
 	u64 val;
-	struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
+	struct ipath_portdata *pd;
 	gfp_t gfp_flags = GFP_USER | __GFP_COMP;
 	unsigned long flags;
 
-	ret = init_housekeeping(dd, &pd, reinit);
+	ret = init_housekeeping(dd, reinit);
 	if (ret)
 		goto done;
 
@@ -700,7 +735,7 @@
 	 * we now use routines that backend onto __get_free_pages, the
 	 * rest would be wasted.
 	 */
-	dd->ipath_rcvhdrcnt = dd->ipath_rcvegrcnt;
+	dd->ipath_rcvhdrcnt = max(dd->ipath_p0_rcvegrcnt, dd->ipath_rcvegrcnt);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt,
 			 dd->ipath_rcvhdrcnt);
 
@@ -731,8 +766,8 @@
 	if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
 		int i = (int) piobufs -
 			(int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
-		if (i < 0)
-			i = 0;
+		if (i < 1)
+			i = 1;
 		dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
 			 "%d for kernel leaves too few for %d user ports "
 			 "(%d each); using %u\n", kpiobufs,
@@ -751,24 +786,40 @@
 		ipath_dbg("allocating %u pbufs/port leaves %u unused, "
 			  "add to kernel\n", dd->ipath_pbufsport, val32);
 		dd->ipath_lastport_piobuf -= val32;
+		kpiobufs += val32;
 		ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n",
 			  dd->ipath_pbufsport, val32);
 	}
-	dd->ipath_lastpioindex = dd->ipath_lastport_piobuf;
+	dd->ipath_lastpioindex = 0;
+	dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
+	ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
 	ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
 		   "each for %u user ports\n", kpiobufs,
 		   piobufs, dd->ipath_pbufsport, uports);
+	if (dd->ipath_pioupd_thresh) {
+		if (dd->ipath_pbufsport < dd->ipath_pioupd_thresh)
+			dd->ipath_pioupd_thresh = dd->ipath_pbufsport;
+		if (kpiobufs < dd->ipath_pioupd_thresh)
+			dd->ipath_pioupd_thresh = kpiobufs;
+	}
 
-	dd->ipath_f_early_init(dd);
+	ret = dd->ipath_f_early_init(dd);
+	if (ret) {
+		ipath_dev_err(dd, "Early initialization failure\n");
+		goto done;
+	}
+
 	/*
-	 * cancel any possible active sends from early driver load.
+	 * Cancel any possible active sends from early driver load.
 	 * Follows early_init because some chips have to initialize
 	 * PIO buffers in early_init to avoid false parity errors.
 	 */
 	ipath_cancel_sends(dd, 0);
 
-	/* early_init sets rcvhdrentsize and rcvhdrsize, so this must be
-	 * done after early_init */
+	/*
+	 * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
+	 * done after early_init.
+	 */
 	dd->ipath_hdrqlast =
 		dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize,
@@ -783,8 +834,8 @@
 			goto done;
 	}
 
-	(void)ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
-			       dd->ipath_pioavailregs_phys);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
+			 dd->ipath_pioavailregs_phys);
 	/*
 	 * this is to detect s/w errors, which the h/w works around by
 	 * ignoring the low 6 bits of address, if it wasn't aligned.
@@ -843,58 +894,65 @@
 	/* enable errors that are masked, at least this first time. */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
 			 ~dd->ipath_maskederrs);
-	dd->ipath_errormask = ipath_read_kreg64(dd,
-		dd->ipath_kregs->kr_errormask);
+	dd->ipath_maskederrs = 0; /* don't re-enable ignored in timer */
+	dd->ipath_errormask =
+		ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
 	/* clear any interrupts up to this point (ints still not enabled) */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
 
+	dd->ipath_f_tidtemplate(dd);
+
 	/*
 	 * Set up the port 0 (kernel) rcvhdr q and egr TIDs.  If doing
 	 * re-init, the simplest way to handle this is to free
 	 * existing, and re-allocate.
 	 * Need to re-create rest of port 0 portdata as well.
 	 */
+	pd = dd->ipath_pd[0];
 	if (reinit) {
-		/* Alloc and init new ipath_portdata for port0,
+		struct ipath_portdata *npd;
+
+		/*
+		 * Alloc and init new ipath_portdata for port0,
 		 * Then free old pd. Could lead to fragmentation, but also
 		 * makes later support for hot-swap easier.
 		 */
-		struct ipath_portdata *npd;
 		npd = create_portdata0(dd);
 		if (npd) {
 			ipath_free_pddata(dd, pd);
-			dd->ipath_pd[0] = pd = npd;
+			dd->ipath_pd[0] = npd;
+			pd = npd;
 		} else {
-			ipath_dev_err(dd, "Unable to allocate portdata for"
-				      "  port 0, failing\n");
+			ipath_dev_err(dd, "Unable to allocate portdata"
+				      " for port 0, failing\n");
 			ret = -ENOMEM;
 			goto done;
 		}
 	}
-	dd->ipath_f_tidtemplate(dd);
 	ret = ipath_create_rcvhdrq(dd, pd);
-	if (!ret) {
-		dd->ipath_hdrqtailptr =
-			(volatile __le64 *)pd->port_rcvhdrtail_kvaddr;
+	if (!ret)
 		ret = create_port0_egr(dd);
-	}
-	if (ret)
-		ipath_dev_err(dd, "failed to allocate port 0 (kernel) "
+	if (ret) {
+		ipath_dev_err(dd, "failed to allocate kernel port's "
 			      "rcvhdrq and/or egr bufs\n");
+		goto done;
+	}
 	else
-		enable_chip(dd, pd, reinit);
+		enable_chip(dd, reinit);
 
-
-	if (!ret && !reinit) {
-	    /* used when we close a port, for DMA already in flight at close */
+	if (!reinit) {
+		/*
+		 * Used when we close a port, for DMA already in flight
+		 * at close.
+		 */
 		dd->ipath_dummy_hdrq = dma_alloc_coherent(
-			&dd->pcidev->dev, pd->port_rcvhdrq_size,
+			&dd->pcidev->dev, dd->ipath_pd[0]->port_rcvhdrq_size,
 			&dd->ipath_dummy_hdrq_phys,
 			gfp_flags);
-		if (!dd->ipath_dummy_hdrq ) {
+		if (!dd->ipath_dummy_hdrq) {
 			dev_info(&dd->pcidev->dev,
 				"Couldn't allocate 0x%lx bytes for dummy hdrq\n",
-				pd->port_rcvhdrq_size);
+				dd->ipath_pd[0]->port_rcvhdrq_size);
 			/* fallback to just 0'ing */
 			dd->ipath_dummy_hdrq_phys = 0UL;
 		}
@@ -906,7 +964,7 @@
 	 */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
 
-	if(!dd->ipath_stats_timer_active) {
+	if (!dd->ipath_stats_timer_active) {
 		/*
 		 * first init, or after an admin disable/enable
 		 * set up stats retrieval timer, even if we had errors
@@ -922,6 +980,16 @@
 		dd->ipath_stats_timer_active = 1;
 	}
 
+	/* Set up SendDMA if chip supports it */
+	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+		ret = setup_sdma(dd);
+
+	/* Set up HoL state */
+	init_timer(&dd->ipath_hol_timer);
+	dd->ipath_hol_timer.function = ipath_hol_event;
+	dd->ipath_hol_timer.data = (unsigned long)dd;
+	dd->ipath_hol_state = IPATH_HOL_UP;
+
 done:
 	if (!ret) {
 		*dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
@@ -934,6 +1002,20 @@
 					 0ULL);
 			/* chip is usable; mark it as initialized */
 			*dd->ipath_statusp |= IPATH_STATUS_INITTED;
+
+			/*
+			 * setup to verify we get an interrupt, and fallback
+			 * to an alternate if necessary and possible
+			 */
+			if (!reinit) {
+				init_timer(&dd->ipath_intrchk_timer);
+				dd->ipath_intrchk_timer.function =
+					verify_interrupt;
+				dd->ipath_intrchk_timer.data =
+					(unsigned long) dd;
+			}
+			dd->ipath_intrchk_timer.expires = jiffies + HZ/2;
+			add_timer(&dd->ipath_intrchk_timer);
 		} else
 			ipath_dev_err(dd, "No interrupts enabled, couldn't "
 				      "setup interrupt address\n");
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 92e58c9..1b58f47 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -32,6 +32,7 @@
  */
 
 #include <linux/pci.h>
+#include <linux/delay.h>
 
 #include "ipath_kernel.h"
 #include "ipath_verbs.h"
@@ -59,9 +60,11 @@
 	dev_info(&dd->pcidev->dev,
 		"Rewrite PIO buffer %u, to recover from parity error\n",
 		pnum);
-	*pbuf = dwcnt+1; /* no flush required, since already in freeze */
-	while(--dwcnt)
-		*pbuf++ = 0;
+
+	/* no flush required, since already in freeze */
+	writel(dwcnt + 1, pbuf);
+	while (--dwcnt)
+		writel(0, pbuf++);
 }
 
 /*
@@ -70,7 +73,7 @@
  * If rewrite is true, and bits are set in the sendbufferror registers,
  * we'll write to the buffer, for error recovery on parity errors.
  */
-static void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
 {
 	u32 piobcnt;
 	unsigned long sbuf[4];
@@ -84,12 +87,14 @@
 		dd, dd->ipath_kregs->kr_sendbuffererror);
 	sbuf[1] = ipath_read_kreg64(
 		dd, dd->ipath_kregs->kr_sendbuffererror + 1);
-	if (piobcnt > 128) {
+	if (piobcnt > 128)
 		sbuf[2] = ipath_read_kreg64(
 			dd, dd->ipath_kregs->kr_sendbuffererror + 2);
+	if (piobcnt > 192)
 		sbuf[3] = ipath_read_kreg64(
 			dd, dd->ipath_kregs->kr_sendbuffererror + 3);
-	}
+	else
+		sbuf[3] = 0;
 
 	if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
 		int i;
@@ -254,24 +259,20 @@
 }
 
 /* return the strings for the most common link states */
-static char *ib_linkstate(u32 linkstate)
+static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
 {
 	char *ret;
+	u32 state;
 
-	switch (linkstate) {
-	case IPATH_IBSTATE_INIT:
+	state = ipath_ib_state(dd, ibcs);
+	if (state == dd->ib_init)
 		ret = "Init";
-		break;
-	case IPATH_IBSTATE_ARM:
+	else if (state == dd->ib_arm)
 		ret = "Arm";
-		break;
-	case IPATH_IBSTATE_ACTIVE:
+	else if (state == dd->ib_active)
 		ret = "Active";
-		break;
-	default:
+	else
 		ret = "Down";
-	}
-
 	return ret;
 }
 
@@ -286,103 +287,172 @@
 }
 
 static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
-				     ipath_err_t errs, int noprint)
+				     ipath_err_t errs)
 {
-	u64 val;
-	u32 ltstate, lstate;
+	u32 ltstate, lstate, ibstate, lastlstate;
+	u32 init = dd->ib_init;
+	u32 arm = dd->ib_arm;
+	u32 active = dd->ib_active;
+	const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+
+	lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */
+	ibstate = ipath_ib_state(dd, ibcs);
+	/* linkstate at last interrupt */
+	lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
+	ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */
 
 	/*
-	 * even if diags are enabled, we want to notice LINKINIT, etc.
-	 * We just don't want to change the LED state, or
-	 * dd->ipath_kregs->kr_ibcctrl
+	 * Since going into a recovery state causes the link state to go
+	 * down and since recovery is transitory, it is better if we "miss"
+	 * ever seeing the link training state go into recovery (i.e.,
+	 * ignore this transition for link state special handling purposes)
+	 * without even updating ipath_lastibcstat.
 	 */
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-	lstate = val & IPATH_IBSTATE_MASK;
+	if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) ||
+	    (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) ||
+	    (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE))
+		goto done;
 
 	/*
-	 * this is confusing enough when it happens that I want to always put it
-	 * on the console and in the logs.  If it was a requested state change,
-	 * we'll have already cleared the flags, so we won't print this warning
+	 * if linkstate transitions into INIT from any of the various down
+	 * states, or if it transitions from any of the up (INIT or better)
+	 * states into any of the down states (except link recovery), then
+	 * call the chip-specific code to take appropriate actions.
 	 */
-	if ((lstate != IPATH_IBSTATE_ARM && lstate != IPATH_IBSTATE_ACTIVE)
-		&& (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
-		dev_info(&dd->pcidev->dev, "Link state changed from %s to %s\n",
-				 (dd->ipath_flags & IPATH_LINKARMED) ? "ARM" : "ACTIVE",
-				 ib_linkstate(lstate));
-		/*
-		 * Flush all queued sends when link went to DOWN or INIT,
-		 * to be sure that they don't block SMA and other MAD packets
-		 */
-		ipath_cancel_sends(dd, 1);
-	}
-	else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
-	    lstate == IPATH_IBSTATE_ACTIVE) {
-		/*
-		 * only print at SMA if there is a change, debug if not
-		 * (sometimes we want to know that, usually not).
-		 */
-		if (lstate == ((unsigned) dd->ipath_lastibcstat
-			       & IPATH_IBSTATE_MASK)) {
-			ipath_dbg("Status change intr but no change (%s)\n",
-				  ib_linkstate(lstate));
+	if (lstate >= INFINIPATH_IBCS_L_STATE_INIT &&
+		lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) {
+		/* transitioned to UP */
+		if (dd->ipath_f_ib_updown(dd, 1, ibcs)) {
+			/* link came up, so we must no longer be disabled */
+			dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
+			ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n");
+			goto skip_ibchange; /* chip-code handled */
 		}
-		else
-			ipath_cdbg(VERBOSE, "Unit %u link state %s, last "
-				   "was %s\n", dd->ipath_unit,
-				   ib_linkstate(lstate),
-				   ib_linkstate((unsigned)
-						dd->ipath_lastibcstat
-						& IPATH_IBSTATE_MASK));
+	} else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT ||
+		(dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) &&
+		ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT &&
+		ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
+		int handled;
+		handled = dd->ipath_f_ib_updown(dd, 0, ibcs);
+		dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY;
+		if (handled) {
+			ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n");
+			goto skip_ibchange; /* chip-code handled */
+		}
 	}
-	else {
-		lstate = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
-		if (lstate == IPATH_IBSTATE_INIT ||
-		    lstate == IPATH_IBSTATE_ARM ||
-		    lstate == IPATH_IBSTATE_ACTIVE)
-			ipath_cdbg(VERBOSE, "Unit %u link state down"
-				   " (state 0x%x), from %s\n",
-				   dd->ipath_unit,
-				   (u32)val & IPATH_IBSTATE_MASK,
-				   ib_linkstate(lstate));
-		else
-			ipath_cdbg(VERBOSE, "Unit %u link state changed "
-				   "to 0x%x from down (%x)\n",
-				   dd->ipath_unit, (u32) val, lstate);
+
+	/*
+	 * Significant enough to always print and get into logs, if it was
+	 * unexpected.  If it was a requested state change, we'll have
+	 * already cleared the flags, so we won't print this warning
+	 */
+	if ((ibstate != arm && ibstate != active) &&
+	    (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
+		dev_info(&dd->pcidev->dev, "Link state changed from %s "
+			 "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ?
+			 "ARM" : "ACTIVE", ib_linkstate(dd, ibcs));
 	}
-	ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-		INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
-	lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
-		INFINIPATH_IBCS_LINKSTATE_MASK;
 
 	if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
 	    ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-		u32 last_ltstate;
-
+		u32 lastlts;
+		lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
 		/*
-		 * Ignore cycling back and forth from Polling.Active
-		 * to Polling.Quiet while waiting for the other end of
-		 * the link to come up. We will cycle back and forth
-		 * between them if no cable is plugged in,
-		 * the other device is powered off or disabled, etc.
+		 * Ignore cycling back and forth from Polling.Active to
+		 * Polling.Quiet while waiting for the other end of the link
+		 * to come up, except to try and decide if we are connected
+		 * to a live IB device or not.  We will cycle back and
+		 * forth between them if no cable is plugged in, the other
+		 * device is powered off or disabled, etc.
 		 */
-		last_ltstate = (dd->ipath_lastibcstat >>
-				INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)
-			& INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
-		if (last_ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE
-		    || last_ltstate ==
-		    INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-			if (dd->ipath_ibpollcnt > 40) {
+		if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
+		    lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
+			if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
+			     (++dd->ipath_ibpollcnt == 40)) {
 				dd->ipath_flags |= IPATH_NOCABLE;
 				*dd->ipath_statusp |=
 					IPATH_STATUS_IB_NOCABLE;
-			} else
-				dd->ipath_ibpollcnt++;
+				ipath_cdbg(LINKVERB, "Set NOCABLE\n");
+			}
+			ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n",
+				ipath_ibcstatus_str[ltstate], ibstate);
 			goto skip_ibchange;
 		}
 	}
-	dd->ipath_ibpollcnt = 0;	/* some state other than 2 or 3 */
+
+	dd->ipath_ibpollcnt = 0; /* not poll*, now */
 	ipath_stats.sps_iblink++;
-	if (ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
+
+	if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) {
+		u64 linkrecov;
+		linkrecov = ipath_snap_cntr(dd,
+			dd->ipath_cregs->cr_iblinkerrrecovcnt);
+		if (linkrecov != dd->ipath_lastlinkrecov) {
+			ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n",
+				ibcs, ib_linkstate(dd, ibcs),
+				ipath_ibcstatus_str[ltstate],
+				linkrecov);
+			/* and no more until active again */
+			dd->ipath_lastlinkrecov = 0;
+			ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+			goto skip_ibchange;
+		}
+	}
+
+	if (ibstate == init || ibstate == arm || ibstate == active) {
+		*dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
+		if (ibstate == init || ibstate == arm) {
+			*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+			if (dd->ipath_flags & IPATH_LINKACTIVE)
+				signal_ib_event(dd, IB_EVENT_PORT_ERR);
+		}
+		if (ibstate == arm) {
+			dd->ipath_flags |= IPATH_LINKARMED;
+			dd->ipath_flags &= ~(IPATH_LINKUNK |
+				IPATH_LINKINIT | IPATH_LINKDOWN |
+				IPATH_LINKACTIVE | IPATH_NOCABLE);
+			ipath_hol_down(dd);
+		} else  if (ibstate == init) {
+			/*
+			 * set INIT and DOWN.  Down is checked by
+			 * most of the other code, but INIT is
+			 * useful to know in a few places.
+			 */
+			dd->ipath_flags |= IPATH_LINKINIT |
+				IPATH_LINKDOWN;
+			dd->ipath_flags &= ~(IPATH_LINKUNK |
+				IPATH_LINKARMED | IPATH_LINKACTIVE |
+				IPATH_NOCABLE);
+			ipath_hol_down(dd);
+		} else {  /* active */
+			dd->ipath_lastlinkrecov = ipath_snap_cntr(dd,
+				dd->ipath_cregs->cr_iblinkerrrecovcnt);
+			*dd->ipath_statusp |=
+				IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
+			dd->ipath_flags |= IPATH_LINKACTIVE;
+			dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
+				| IPATH_LINKDOWN | IPATH_LINKARMED |
+				IPATH_NOCABLE);
+			if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+				ipath_restart_sdma(dd);
+			signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
+			/* LED active not handled in chip _f_updown */
+			dd->ipath_f_setextled(dd, lstate, ltstate);
+			ipath_hol_up(dd);
+		}
+
+		/*
+		 * print after we've already done the work, so as not to
+		 * delay the state changes and notifications, for debugging
+		 */
+		if (lstate == lastlstate)
+			ipath_cdbg(LINKVERB, "Unchanged from last: %s "
+				"(%x)\n", ib_linkstate(dd, ibcs), ibstate);
+		else
+			ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n",
+				  dd->ipath_unit, ib_linkstate(dd, ibcs),
+				  ipath_ibcstatus_str[ltstate],  ibstate);
+	} else { /* down */
 		if (dd->ipath_flags & IPATH_LINKACTIVE)
 			signal_ib_event(dd, IB_EVENT_PORT_ERR);
 		dd->ipath_flags |= IPATH_LINKDOWN;
@@ -391,69 +461,28 @@
 				     IPATH_LINKARMED);
 		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
 		dd->ipath_lli_counter = 0;
-		if (!noprint) {
-			if (((dd->ipath_lastibcstat >>
-			      INFINIPATH_IBCS_LINKSTATE_SHIFT) &
-			     INFINIPATH_IBCS_LINKSTATE_MASK)
-			    == INFINIPATH_IBCS_L_STATE_ACTIVE)
-				/* if from up to down be more vocal */
-				ipath_cdbg(VERBOSE,
-					   "Unit %u link now down (%s)\n",
-					   dd->ipath_unit,
-					   ipath_ibcstatus_str[ltstate]);
-			else
-				ipath_cdbg(VERBOSE, "Unit %u link is "
-					   "down (%s)\n", dd->ipath_unit,
-					   ipath_ibcstatus_str[ltstate]);
-		}
 
-		dd->ipath_f_setextled(dd, lstate, ltstate);
-	} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ACTIVE) {
-		dd->ipath_flags |= IPATH_LINKACTIVE;
-		dd->ipath_flags &=
-			~(IPATH_LINKUNK | IPATH_LINKINIT | IPATH_LINKDOWN |
-			  IPATH_LINKARMED | IPATH_NOCABLE);
-		*dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
-		*dd->ipath_statusp |=
-			IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
-		dd->ipath_f_setextled(dd, lstate, ltstate);
-		signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
-	} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
-		if (dd->ipath_flags & IPATH_LINKACTIVE)
-			signal_ib_event(dd, IB_EVENT_PORT_ERR);
-		/*
-		 * set INIT and DOWN.  Down is checked by most of the other
-		 * code, but INIT is useful to know in a few places.
-		 */
-		dd->ipath_flags |= IPATH_LINKINIT | IPATH_LINKDOWN;
-		dd->ipath_flags &=
-			~(IPATH_LINKUNK | IPATH_LINKACTIVE | IPATH_LINKARMED
-			  | IPATH_NOCABLE);
-		*dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
-					| IPATH_STATUS_IB_READY);
-		dd->ipath_f_setextled(dd, lstate, ltstate);
-	} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ARM) {
-		if (dd->ipath_flags & IPATH_LINKACTIVE)
-			signal_ib_event(dd, IB_EVENT_PORT_ERR);
-		dd->ipath_flags |= IPATH_LINKARMED;
-		dd->ipath_flags &=
-			~(IPATH_LINKUNK | IPATH_LINKDOWN | IPATH_LINKINIT |
-			  IPATH_LINKACTIVE | IPATH_NOCABLE);
-		*dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
-					| IPATH_STATUS_IB_READY);
-		dd->ipath_f_setextled(dd, lstate, ltstate);
-	} else {
-		if (!noprint)
-			ipath_dbg("IBstatuschange unit %u: %s (%x)\n",
-				  dd->ipath_unit,
-				  ipath_ibcstatus_str[ltstate], ltstate);
+		if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN)
+			ipath_cdbg(VERBOSE, "Unit %u link state down "
+				   "(state 0x%x), from %s\n",
+				   dd->ipath_unit, lstate,
+				   ib_linkstate(dd, dd->ipath_lastibcstat));
+		else
+			ipath_cdbg(LINKVERB, "Unit %u link state changed "
+				   "to %s (0x%x) from down (%x)\n",
+				   dd->ipath_unit,
+				   ipath_ibcstatus_str[ltstate],
+				   ibstate, lastlstate);
 	}
+
 skip_ibchange:
-	dd->ipath_lastibcstat = val;
+	dd->ipath_lastibcstat = ibcs;
+done:
+	return;
 }
 
 static void handle_supp_msgs(struct ipath_devdata *dd,
-			     unsigned supp_msgs, char *msg, int msgsz)
+			     unsigned supp_msgs, char *msg, u32 msgsz)
 {
 	/*
 	 * Print the message unless it's ibc status change only, which
@@ -461,12 +490,19 @@
 	 */
 	if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
 		int iserr;
-		iserr = ipath_decode_err(msg, msgsz,
+		ipath_err_t mask;
+		iserr = ipath_decode_err(dd, msg, msgsz,
 					 dd->ipath_lasterror &
 					 ~INFINIPATH_E_IBSTATUSCHANGED);
-		if (dd->ipath_lasterror &
-			~(INFINIPATH_E_RRCVEGRFULL |
-			INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
+
+		mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+			INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED;
+
+		/* if we're in debug, then don't mask SDMADISABLED msgs */
+		if (ipath_debug & __IPATH_DBG)
+			mask &= ~INFINIPATH_E_SDMADISABLED;
+
+		if (dd->ipath_lasterror & ~mask)
 			ipath_dev_err(dd, "Suppressed %u messages for "
 				      "fast-repeating errors (%s) (%llx)\n",
 				      supp_msgs, msg,
@@ -493,7 +529,7 @@
 
 static unsigned handle_frequent_errors(struct ipath_devdata *dd,
 				       ipath_err_t errs, char *msg,
-				       int msgsz, int *noprint)
+				       u32 msgsz, int *noprint)
 {
 	unsigned long nc;
 	static unsigned long nextmsg_time;
@@ -523,19 +559,125 @@
 	return supp_msgs;
 }
 
+static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs)
+{
+	unsigned long flags;
+	int expected;
+
+	if (ipath_debug & __IPATH_DBG) {
+		char msg[128];
+		ipath_decode_err(dd, msg, sizeof msg, errs &
+			INFINIPATH_E_SDMAERRS);
+		ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg);
+	}
+	if (ipath_debug & __IPATH_VERBDBG) {
+		unsigned long tl, hd, status, lengen;
+		tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
+		hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
+		status = ipath_read_kreg64(dd
+			, dd->ipath_kregs->kr_senddmastatus);
+		lengen = ipath_read_kreg64(dd,
+			dd->ipath_kregs->kr_senddmalengen);
+		ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx "
+			"lengen 0x%lx\n", tl, hd, status, lengen);
+	}
+
+	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+	__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+	expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+	if (!expected)
+		ipath_cancel_sends(dd, 1);
+}
+
+static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat)
+{
+	unsigned long flags;
+	int expected;
+
+	if ((istat & INFINIPATH_I_SDMAINT) &&
+	    !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+		ipath_sdma_intr(dd);
+
+	if (istat & INFINIPATH_I_SDMADISABLED) {
+		expected = test_bit(IPATH_SDMA_ABORTING,
+			&dd->ipath_sdma_status);
+		ipath_dbg("%s SDmaDisabled intr\n",
+			expected ? "expected" : "unexpected");
+		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+		__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+		if (!expected)
+			ipath_cancel_sends(dd, 1);
+		if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+			tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
+	}
+}
+
+static int handle_hdrq_full(struct ipath_devdata *dd)
+{
+	int chkerrpkts = 0;
+	u32 hd, tl;
+	u32 i;
+
+	ipath_stats.sps_hdrqfull++;
+	for (i = 0; i < dd->ipath_cfgports; i++) {
+		struct ipath_portdata *pd = dd->ipath_pd[i];
+
+		if (i == 0) {
+			/*
+			 * For kernel receive queues, we just want to know
+			 * if there are packets in the queue that we can
+			 * process.
+			 */
+			if (pd->port_head != ipath_get_hdrqtail(pd))
+				chkerrpkts |= 1 << i;
+			continue;
+		}
+
+		/* Skip if user context is not open */
+		if (!pd || !pd->port_cnt)
+			continue;
+
+		/* Don't report the same point multiple times. */
+		if (dd->ipath_flags & IPATH_NODMA_RTAIL)
+			tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i);
+		else
+			tl = ipath_get_rcvhdrtail(pd);
+		if (tl == pd->port_lastrcvhdrqtail)
+			continue;
+
+		hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i);
+		if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) {
+			pd->port_lastrcvhdrqtail = tl;
+			pd->port_hdrqfull++;
+			/* flush hdrqfull so that poll() sees it */
+			wmb();
+			wake_up_interruptible(&pd->port_wait);
+		}
+	}
+
+	return chkerrpkts;
+}
+
 static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 {
 	char msg[128];
 	u64 ignore_this_time = 0;
-	int i, iserr = 0;
+	u64 iserr = 0;
 	int chkerrpkts = 0, noprint = 0;
 	unsigned supp_msgs;
 	int log_idx;
 
-	supp_msgs = handle_frequent_errors(dd, errs, msg, sizeof msg, &noprint);
+	/*
+	 * don't report errors that are masked, either at init
+	 * (not set in ipath_errormask), or temporarily (set in
+	 * ipath_maskederrs)
+	 */
+	errs &= dd->ipath_errormask & ~dd->ipath_maskederrs;
 
-	/* don't report errors that are masked */
-	errs &= ~dd->ipath_maskederrs;
+	supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg,
+		&noprint);
 
 	/* do these first, they are most important */
 	if (errs & INFINIPATH_E_HARDWARE) {
@@ -550,6 +692,9 @@
 		}
 	}
 
+	if (errs & INFINIPATH_E_SDMAERRS)
+		handle_sdma_errors(dd, errs);
+
 	if (!noprint && (errs & ~dd->ipath_e_bitsextant))
 		ipath_dev_err(dd, "error interrupt with unknown errors "
 			      "%llx set\n", (unsigned long long)
@@ -580,18 +725,19 @@
 		 * ones on this particular interrupt, which also isn't great
 		 */
 		dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
+
 		dd->ipath_errormask &= ~dd->ipath_maskederrs;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-			dd->ipath_errormask);
-		s_iserr = ipath_decode_err(msg, sizeof msg,
-			dd->ipath_maskederrs);
+				 dd->ipath_errormask);
+		s_iserr = ipath_decode_err(dd, msg, sizeof msg,
+					   dd->ipath_maskederrs);
 
 		if (dd->ipath_maskederrs &
-			~(INFINIPATH_E_RRCVEGRFULL |
-			INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
+		    ~(INFINIPATH_E_RRCVEGRFULL |
+		      INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
 			ipath_dev_err(dd, "Temporarily disabling "
 			    "error(s) %llx reporting; too frequent (%s)\n",
-				(unsigned long long)dd->ipath_maskederrs,
+				(unsigned long long) dd->ipath_maskederrs,
 				msg);
 		else {
 			/*
@@ -633,26 +779,43 @@
 			  INFINIPATH_E_IBSTATUSCHANGED);
 	}
 
-	/* likely due to cancel, so suppress */
+	if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) {
+		dd->ipath_spectriggerhit++;
+		ipath_dbg("%lu special trigger hits\n",
+			dd->ipath_spectriggerhit);
+	}
+
+	/* likely due to cancel; so suppress message unless verbose */
 	if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
 		dd->ipath_lastcancel > jiffies) {
-		ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n");
+		/* armlaunch takes precedence; it often causes both. */
+		ipath_cdbg(VERBOSE,
+			"Suppressed %s error (%llx) after sendbuf cancel\n",
+			(errs &  INFINIPATH_E_SPIOARMLAUNCH) ?
+			"armlaunch" : "sendpktlen", (unsigned long long)errs);
 		errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
 	}
 
 	if (!errs)
 		return 0;
 
-	if (!noprint)
+	if (!noprint) {
+		ipath_err_t mask;
 		/*
-		 * the ones we mask off are handled specially below or above
+		 * The ones we mask off are handled specially below
+		 * or above.  Also mask SDMADISABLED by default as it
+		 * is too chatty.
 		 */
-		ipath_decode_err(msg, sizeof msg,
-				 errs & ~(INFINIPATH_E_IBSTATUSCHANGED |
-					  INFINIPATH_E_RRCVEGRFULL |
-					  INFINIPATH_E_RRCVHDRFULL |
-					  INFINIPATH_E_HARDWARE));
-	else
+		mask = INFINIPATH_E_IBSTATUSCHANGED |
+			INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+			INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED;
+
+		/* if we're in debug, then don't mask SDMADISABLED msgs */
+		if (ipath_debug & __IPATH_DBG)
+			mask &= ~INFINIPATH_E_SDMADISABLED;
+
+		ipath_decode_err(dd, msg, sizeof msg, errs & ~mask);
+	} else
 		/* so we don't need if (!noprint) at strlcat's below */
 		*msg = 0;
 
@@ -677,40 +840,8 @@
 	 * fast_stats, no more than every 5 seconds, user ports get printed
 	 * on close
 	 */
-	if (errs & INFINIPATH_E_RRCVHDRFULL) {
-		u32 hd, tl;
-		ipath_stats.sps_hdrqfull++;
-		for (i = 0; i < dd->ipath_cfgports; i++) {
-			struct ipath_portdata *pd = dd->ipath_pd[i];
-			if (i == 0) {
-				hd = pd->port_head;
-				tl = (u32) le64_to_cpu(
-					*dd->ipath_hdrqtailptr);
-			} else if (pd && pd->port_cnt &&
-				   pd->port_rcvhdrtail_kvaddr) {
-				/*
-				 * don't report same point multiple times,
-				 * except kernel
-				 */
-				tl = *(u64 *) pd->port_rcvhdrtail_kvaddr;
-				if (tl == pd->port_lastrcvhdrqtail)
-					continue;
-				hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
-						       i);
-			} else
-				continue;
-			if (hd == (tl + 1) ||
-			    (!hd && tl == dd->ipath_hdrqlast)) {
-				if (i == 0)
-					chkerrpkts = 1;
-				pd->port_lastrcvhdrqtail = tl;
-				pd->port_hdrqfull++;
-				/* flush hdrqfull so that poll() sees it */
-				wmb();
-				wake_up_interruptible(&pd->port_wait);
-			}
-		}
-	}
+	if (errs & INFINIPATH_E_RRCVHDRFULL)
+		chkerrpkts |= handle_hdrq_full(dd);
 	if (errs & INFINIPATH_E_RRCVEGRFULL) {
 		struct ipath_portdata *pd = dd->ipath_pd[0];
 
@@ -721,9 +852,8 @@
 		 * vs user)
 		 */
 		ipath_stats.sps_etidfull++;
-		if (pd->port_head !=
-		    (u32) le64_to_cpu(*dd->ipath_hdrqtailptr))
-			chkerrpkts = 1;
+		if (pd->port_head != ipath_get_hdrqtail(pd))
+			chkerrpkts |= 1;
 	}
 
 	/*
@@ -741,16 +871,13 @@
 		dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
 				     | IPATH_LINKARMED | IPATH_LINKACTIVE);
 		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-		if (!noprint) {
-			u64 st = ipath_read_kreg64(
-				dd, dd->ipath_kregs->kr_ibcstatus);
 
-			ipath_dbg("Lost link, link now down (%s)\n",
-				  ipath_ibcstatus_str[st & 0xf]);
-		}
+		ipath_dbg("Lost link, link now down (%s)\n",
+			ipath_ibcstatus_str[ipath_read_kreg64(dd,
+			dd->ipath_kregs->kr_ibcstatus) & 0xf]);
 	}
 	if (errs & INFINIPATH_E_IBSTATUSCHANGED)
-		handle_e_ibstatuschanged(dd, errs, noprint);
+		handle_e_ibstatuschanged(dd, errs);
 
 	if (errs & INFINIPATH_E_RESET) {
 		if (!noprint)
@@ -765,9 +892,6 @@
 	if (!noprint && *msg) {
 		if (iserr)
 			ipath_dev_err(dd, "%s error\n", msg);
-		else
-			dev_info(&dd->pcidev->dev, "%s packet problems\n",
-				msg);
 	}
 	if (dd->ipath_state_wanted & dd->ipath_flags) {
 		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
@@ -779,7 +903,6 @@
 	return chkerrpkts;
 }
 
-
 /*
  * try to cleanup as much as possible for anything that might have gone
  * wrong while in freeze mode, such as pio buffers being written by user
@@ -796,8 +919,7 @@
 void ipath_clear_freeze(struct ipath_devdata *dd)
 {
 	int i, im;
-	__le64 val;
-	unsigned long flags;
+	u64 val;
 
 	/* disable error interrupts, to avoid confusion */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
@@ -816,14 +938,7 @@
 			 dd->ipath_control);
 
 	/* ensure pio avail updates continue */
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-		 dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			 dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+	ipath_force_pio_avail_update(dd);
 
 	/*
 	 * We just enabled pioavailupdate, so dma copy is almost certainly
@@ -831,10 +946,13 @@
 	 */
 	for (i = 0; i < dd->ipath_pioavregs; i++) {
 		/* deal with 6110 chip bug */
-		im = i > 3 ? i ^ 1 : i;
+		im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
+			i ^ 1 : i;
 		val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im);
-		dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i]
-			= le64_to_cpu(val);
+		dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val);
+		dd->ipath_pioavailshadow[i] = val |
+			(~dd->ipath_pioavailkernel[i] <<
+			INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
 	}
 
 	/*
@@ -950,7 +1068,7 @@
  * process was waiting for a packet to arrive, and didn't want
  * to poll
  */
-static void handle_urcv(struct ipath_devdata *dd, u32 istat)
+static void handle_urcv(struct ipath_devdata *dd, u64 istat)
 {
 	u64 portr;
 	int i;
@@ -966,12 +1084,13 @@
 	 * and ipath_poll_next()...
 	 */
 	rmb();
-	portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
-		 dd->ipath_i_rcvavail_mask)
-		| ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
-		   dd->ipath_i_rcvurg_mask);
+	portr = ((istat >> dd->ipath_i_rcvavail_shift) &
+		 dd->ipath_i_rcvavail_mask) |
+		((istat >> dd->ipath_i_rcvurg_shift) &
+		 dd->ipath_i_rcvurg_mask);
 	for (i = 1; i < dd->ipath_cfgports; i++) {
 		struct ipath_portdata *pd = dd->ipath_pd[i];
+
 		if (portr & (1 << i) && pd && pd->port_cnt) {
 			if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
 					       &pd->port_flag)) {
@@ -988,7 +1107,7 @@
 	}
 	if (rcvdint) {
 		/* only want to take one interrupt, so turn off the rcv
-		 * interrupt for all the ports that we did the wakeup on
+		 * interrupt for all the ports that we set the rcv_waiting
 		 * (but never for kernel port)
 		 */
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
@@ -999,12 +1118,11 @@
 irqreturn_t ipath_intr(int irq, void *data)
 {
 	struct ipath_devdata *dd = data;
-	u32 istat, chk0rcv = 0;
+	u64 istat, chk0rcv = 0;
 	ipath_err_t estat = 0;
 	irqreturn_t ret;
 	static unsigned unexpected = 0;
-	static const u32 port0rbits = (1U<<INFINIPATH_I_RCVAVAIL_SHIFT) |
-		 (1U<<INFINIPATH_I_RCVURG_SHIFT);
+	u64 kportrbits;
 
 	ipath_stats.sps_ints++;
 
@@ -1053,17 +1171,17 @@
 
 	if (unlikely(istat & ~dd->ipath_i_bitsextant))
 		ipath_dev_err(dd,
-			      "interrupt with unknown interrupts %x set\n",
-			      istat & (u32) ~ dd->ipath_i_bitsextant);
-	else
-		ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
+			      "interrupt with unknown interrupts %Lx set\n",
+			      istat & ~dd->ipath_i_bitsextant);
+	else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */
+		ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", istat);
 
-	if (unlikely(istat & INFINIPATH_I_ERROR)) {
+	if (istat & INFINIPATH_I_ERROR) {
 		ipath_stats.sps_errints++;
 		estat = ipath_read_kreg64(dd,
 					  dd->ipath_kregs->kr_errorstatus);
 		if (!estat)
-			dev_info(&dd->pcidev->dev, "error interrupt (%x), "
+			dev_info(&dd->pcidev->dev, "error interrupt (%Lx), "
 				 "but no error bits set!\n", istat);
 		else if (estat == -1LL)
 			/*
@@ -1073,9 +1191,7 @@
 			ipath_dev_err(dd, "Read of error status failed "
 				      "(all bits set); ignoring\n");
 		else
-			if (handle_errors(dd, estat))
-				/* force calling ipath_kreceive() */
-				chk0rcv = 1;
+			chk0rcv |= handle_errors(dd, estat);
 	}
 
 	if (istat & INFINIPATH_I_GPIO) {
@@ -1093,8 +1209,7 @@
 
 		gpiostatus = ipath_read_kreg32(
 			dd, dd->ipath_kregs->kr_gpio_status);
-		/* First the error-counter case.
-		 */
+		/* First the error-counter case. */
 		if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
 		    (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
 			/* want to clear the bits we see asserted. */
@@ -1156,7 +1271,6 @@
 					(u64) to_clear);
 		}
 	}
-	chk0rcv |= istat & port0rbits;
 
 	/*
 	 * Clear the interrupt bits we found set, unless they are receive
@@ -1169,22 +1283,25 @@
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
 
 	/*
-	 * handle port0 receive  before checking for pio buffers available,
-	 * since receives can overflow; piobuf waiters can afford a few
-	 * extra cycles, since they were waiting anyway, and user's waiting
-	 * for receive are at the bottom.
+	 * Handle kernel receive queues before checking for pio buffers
+	 * available since receives can overflow; piobuf waiters can afford
+	 * a few extra cycles, since they were waiting anyway, and user's
+	 * waiting for receive are at the bottom.
 	 */
-	if (chk0rcv) {
+	kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) |
+		(1ULL << dd->ipath_i_rcvurg_shift);
+	if (chk0rcv || (istat & kportrbits)) {
+		istat &= ~kportrbits;
 		ipath_kreceive(dd->ipath_pd[0]);
-		istat &= ~port0rbits;
 	}
 
-	if (istat & ((dd->ipath_i_rcvavail_mask <<
-		      INFINIPATH_I_RCVAVAIL_SHIFT)
-		     | (dd->ipath_i_rcvurg_mask <<
-			INFINIPATH_I_RCVURG_SHIFT)))
+	if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) |
+		     (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift)))
 		handle_urcv(dd, istat);
 
+	if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED))
+		handle_sdma_intr(dd, istat);
+
 	if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
 		unsigned long flags;
 
@@ -1195,7 +1312,10 @@
 		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
 
-		handle_layer_pioavail(dd);
+		if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
+			handle_layer_pioavail(dd);
+		else
+			ipath_dbg("unexpected BUFAVAIL intr\n");
 	}
 
 	ret = IRQ_HANDLED;
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index ecf3f7f..5863cbe 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -1,7 +1,7 @@
 #ifndef _IPATH_KERNEL_H
 #define _IPATH_KERNEL_H
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -42,6 +42,8 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/scatterlist.h>
 #include <asm/io.h>
 #include <rdma/ib_verbs.h>
 
@@ -175,9 +177,13 @@
 	u16 poll_type;
 	/* port rcvhdrq head offset */
 	u32 port_head;
+	/* receive packet sequence counter */
+	u32 port_seq_cnt;
 };
 
 struct sk_buff;
+struct ipath_sge_state;
+struct ipath_verbs_txreq;
 
 /*
  * control information for layered drivers
@@ -191,6 +197,40 @@
 	dma_addr_t phys;
 };
 
+struct ipath_sdma_txreq {
+	int                 flags;
+	int                 sg_count;
+	union {
+		struct scatterlist *sg;
+		void *map_addr;
+	};
+	void              (*callback)(void *, int);
+	void               *callback_cookie;
+	int                 callback_status;
+	u16                 start_idx;  /* sdma private */
+	u16                 next_descq_idx;  /* sdma private */
+	struct list_head    list;       /* sdma private */
+};
+
+struct ipath_sdma_desc {
+	__le64 qw[2];
+};
+
+#define IPATH_SDMA_TXREQ_F_USELARGEBUF  0x1
+#define IPATH_SDMA_TXREQ_F_HEADTOHOST   0x2
+#define IPATH_SDMA_TXREQ_F_INTREQ       0x4
+#define IPATH_SDMA_TXREQ_F_FREEBUF      0x8
+#define IPATH_SDMA_TXREQ_F_FREEDESC     0x10
+#define IPATH_SDMA_TXREQ_F_VL15         0x20
+
+#define IPATH_SDMA_TXREQ_S_OK        0
+#define IPATH_SDMA_TXREQ_S_SENDERROR 1
+#define IPATH_SDMA_TXREQ_S_ABORTED   2
+#define IPATH_SDMA_TXREQ_S_SHUTDOWN  3
+
+/* max dwords in small buffer packet */
+#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2)
+
 /*
  * Possible IB config parameters for ipath_f_get/set_ib_cfg()
  */
@@ -221,11 +261,6 @@
 	unsigned long ipath_physaddr;
 	/* base of memory alloced for ipath_kregbase, for free */
 	u64 *ipath_kregalloc;
-	/*
-	 * virtual address where port0 rcvhdrqtail updated for this unit.
-	 * only written to by the chip, not the driver.
-	 */
-	volatile __le64 *ipath_hdrqtailptr;
 	/* ipath_cfgports pointers */
 	struct ipath_portdata **ipath_pd;
 	/* sk_buffs used by port 0 eager receive queue */
@@ -283,6 +318,7 @@
 	/* per chip actions needed for IB Link up/down changes */
 	int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64);
 
+	unsigned ipath_lastegr_idx;
 	struct ipath_ibdev *verbs_dev;
 	struct timer_list verbs_timer;
 	/* total dwords sent (summed from counter) */
@@ -309,6 +345,7 @@
 	ipath_err_t ipath_lasthwerror;
 	/* errors masked because they occur too fast */
 	ipath_err_t ipath_maskederrs;
+	u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */
 	/* time in jiffies at which to re-enable maskederrs */
 	unsigned long ipath_unmasktime;
 	/* count of egrfull errors, combined for all ports */
@@ -347,6 +384,7 @@
 	u32 ipath_lastrpkts;
 	/* pio bufs allocated per port */
 	u32 ipath_pbufsport;
+	u32 ipath_pioupd_thresh; /* update threshold, some chips */
 	/*
 	 * number of ports configured as max; zero is set to number chip
 	 * supports, less gives more pio bufs/port, etc.
@@ -365,6 +403,7 @@
 	 * get to multiple devices
 	 */
 	u32 ipath_lastpioindex;
+	u32 ipath_lastpioindexl;
 	/* max length of freezemsg */
 	u32 ipath_freezelen;
 	/*
@@ -381,6 +420,15 @@
 	u32 ipath_pcibar0;
 	/* so we can rewrite it after a chip reset */
 	u32 ipath_pcibar1;
+	u32 ipath_x1_fix_tries;
+	u32 ipath_autoneg_tries;
+	u32 serdes_first_init_done;
+
+	struct ipath_relock {
+		atomic_t ipath_relock_timer_active;
+		struct timer_list ipath_relock_timer;
+		unsigned int ipath_relock_interval; /* in jiffies */
+	} ipath_relock_singleton;
 
 	/* interrupt number */
 	int ipath_irq;
@@ -403,7 +451,7 @@
 	u64 __iomem *ipath_egrtidbase;
 	/* lock to workaround chip bug 9437 and others */
 	spinlock_t ipath_kernel_tid_lock;
-	spinlock_t ipath_tid_lock;
+	spinlock_t ipath_user_tid_lock;
 	spinlock_t ipath_sendctrl_lock;
 
 	/*
@@ -422,11 +470,48 @@
 	struct class_device *diag_class_dev;
 	/* timer used to prevent stats overflow, error throttling, etc. */
 	struct timer_list ipath_stats_timer;
+	/* timer to verify interrupts work, and fallback if possible */
+	struct timer_list ipath_intrchk_timer;
 	void *ipath_dummy_hdrq;	/* used after port close */
 	dma_addr_t ipath_dummy_hdrq_phys;
 
+	/* SendDMA related entries */
+	spinlock_t            ipath_sdma_lock;
+	u64                   ipath_sdma_status;
+	unsigned long         ipath_sdma_abort_jiffies;
+	unsigned long         ipath_sdma_abort_intr_timeout;
+	unsigned long         ipath_sdma_buf_jiffies;
+	struct ipath_sdma_desc *ipath_sdma_descq;
+	u64		      ipath_sdma_descq_added;
+	u64		      ipath_sdma_descq_removed;
+	int		      ipath_sdma_desc_nreserved;
+	u16                   ipath_sdma_descq_cnt;
+	u16                   ipath_sdma_descq_tail;
+	u16                   ipath_sdma_descq_head;
+	u16                   ipath_sdma_next_intr;
+	u16                   ipath_sdma_reset_wait;
+	u8                    ipath_sdma_generation;
+	struct tasklet_struct ipath_sdma_abort_task;
+	struct tasklet_struct ipath_sdma_notify_task;
+	struct list_head      ipath_sdma_activelist;
+	struct list_head      ipath_sdma_notifylist;
+	atomic_t              ipath_sdma_vl15_count;
+	struct timer_list     ipath_sdma_vl15_timer;
+
+	dma_addr_t       ipath_sdma_descq_phys;
+	volatile __le64 *ipath_sdma_head_dma;
+	dma_addr_t       ipath_sdma_head_phys;
+
 	unsigned long ipath_ureg_align; /* user register alignment */
 
+	struct delayed_work ipath_autoneg_work;
+	wait_queue_head_t ipath_autoneg_wait;
+
+	/* HoL blocking / user app forward-progress state */
+	unsigned          ipath_hol_state;
+	unsigned          ipath_hol_next;
+	struct timer_list ipath_hol_timer;
+
 	/*
 	 * Shadow copies of registers; size indicates read access size.
 	 * Most of them are readonly, but some are write-only register,
@@ -447,6 +532,8 @@
 	 * init time.
 	 */
 	unsigned long ipath_pioavailshadow[8];
+	/* bitmap of send buffers available for the kernel to use with PIO. */
+	unsigned long ipath_pioavailkernel[8];
 	/* shadow of kr_gpio_out, for rmw ops */
 	u64 ipath_gpio_out;
 	/* shadow the gpio mask register */
@@ -472,6 +559,8 @@
 	u64 ipath_intconfig;
 	/* kr_sendpiobufbase value */
 	u64 ipath_piobufbase;
+	/* kr_ibcddrctrl shadow */
+	u64 ipath_ibcddrctrl;
 
 	/* these are the "32 bit" regs */
 
@@ -488,7 +577,10 @@
 	unsigned long ipath_rcvctrl;
 	/* shadow kr_sendctrl */
 	unsigned long ipath_sendctrl;
-	unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */
+	/* to not count armlaunch after cancel */
+	unsigned long ipath_lastcancel;
+	/* count cases where special trigger was needed (double write) */
+	unsigned long ipath_spectriggerhit;
 
 	/* value we put in kr_rcvhdrcnt */
 	u32 ipath_rcvhdrcnt;
@@ -510,6 +602,7 @@
 	u32 ipath_piobcnt4k;
 	/* size in bytes of "4KB" PIO buffers */
 	u32 ipath_piosize4k;
+	u32 ipath_pioreserved; /* reserved special-inkernel; */
 	/* kr_rcvegrbase value */
 	u32 ipath_rcvegrbase;
 	/* kr_rcvegrcnt value */
@@ -546,10 +639,10 @@
 	u32 ipath_init_ibmaxlen;
 	/* size of each rcvegrbuffer */
 	u32 ipath_rcvegrbufsize;
-	/* width (2,4,8,16,32) from HT config reg */
-	u32 ipath_htwidth;
-	/* HT speed (200,400,800,1000) from HT config */
-	u32 ipath_htspeed;
+	/* localbus width (1, 2,4,8,16,32) from config space  */
+	u32 ipath_lbus_width;
+	/* localbus speed (HT: 200,400,800,1000; PCIe 2500) */
+	u32 ipath_lbus_speed;
 	/*
 	 * number of sequential ibcstatus change for polling active/quiet
 	 * (i.e., link not coming up).
@@ -573,21 +666,14 @@
 	 */
 	u8 ipath_serial[16];
 	/* human readable board version */
-	u8 ipath_boardversion[80];
+	u8 ipath_boardversion[96];
+	u8 ipath_lbus_info[32]; /* human readable localbus info */
 	/* chip major rev, from ipath_revision */
 	u8 ipath_majrev;
 	/* chip minor rev, from ipath_revision */
 	u8 ipath_minrev;
 	/* board rev, from ipath_revision */
 	u8 ipath_boardrev;
-
-	u8 ipath_r_portenable_shift;
-	u8 ipath_r_intravail_shift;
-	u8 ipath_r_tailupd_shift;
-	u8 ipath_r_portcfg_shift;
-
-	/* unit # of this chip, if present */
-	int ipath_unit;
 	/* saved for restore after reset */
 	u8 ipath_pci_cacheline;
 	/* LID mask control */
@@ -603,6 +689,14 @@
 	/* Rx Polarity inversion (compensate for ~tx on partner) */
 	u8 ipath_rx_pol_inv;
 
+	u8 ipath_r_portenable_shift;
+	u8 ipath_r_intravail_shift;
+	u8 ipath_r_tailupd_shift;
+	u8 ipath_r_portcfg_shift;
+
+	/* unit # of this chip, if present */
+	int ipath_unit;
+
 	/* local link integrity counter */
 	u32 ipath_lli_counter;
 	/* local link integrity errors */
@@ -617,9 +711,6 @@
 	u32 ipath_overrun_thresh_errs;
 	u32 ipath_lli_errs;
 
-	/* status check work */
-	struct delayed_work status_work;
-
 	/*
 	 * Not all devices managed by a driver instance are the same
 	 * type, so these fields must be per-device.
@@ -632,8 +723,8 @@
 	 * Below should be computable from number of ports,
 	 * since they are never modified.
 	 */
-	u32 ipath_i_rcvavail_mask;
-	u32 ipath_i_rcvurg_mask;
+	u64 ipath_i_rcvavail_mask;
+	u64 ipath_i_rcvurg_mask;
 	u16 ipath_i_rcvurg_shift;
 	u16 ipath_i_rcvavail_shift;
 
@@ -641,8 +732,9 @@
 	 * Register bits for selecting i2c direction and values, used for
 	 * I2C serial flash.
 	 */
-	u16 ipath_gpio_sda_num;
-	u16 ipath_gpio_scl_num;
+	u8 ipath_gpio_sda_num;
+	u8 ipath_gpio_scl_num;
+	u8 ipath_i2c_chain_type;
 	u64 ipath_gpio_sda;
 	u64 ipath_gpio_scl;
 
@@ -703,13 +795,51 @@
 	/* interrupt mitigation reload register info */
 	u16 ipath_jint_idle_ticks;	/* idle clock ticks */
 	u16 ipath_jint_max_packets;	/* max packets across all ports */
+
+	/*
+	 * lock for access to SerDes, and flags to sequence preset
+	 * versus steady-state. 7220-only at the moment.
+	 */
+	spinlock_t ipath_sdepb_lock;
+	u8 ipath_presets_needed; /* Set if presets to be restored next DOWN */
 };
 
+/* ipath_hol_state values (stopping/starting user proc, send flushing) */
+#define IPATH_HOL_UP       0
+#define IPATH_HOL_DOWN     1
+/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */
+#define IPATH_HOL_DOWNSTOP 0
+#define IPATH_HOL_DOWNCONT 1
+
+/* bit positions for sdma_status */
+#define IPATH_SDMA_ABORTING  0
+#define IPATH_SDMA_DISARMED  1
+#define IPATH_SDMA_DISABLED  2
+#define IPATH_SDMA_LAYERBUF  3
+#define IPATH_SDMA_RUNNING  62
+#define IPATH_SDMA_SHUTDOWN 63
+
+/* bit combinations that correspond to abort states */
+#define IPATH_SDMA_ABORT_NONE 0
+#define IPATH_SDMA_ABORT_ABORTING (1UL << IPATH_SDMA_ABORTING)
+#define IPATH_SDMA_ABORT_DISARMED ((1UL << IPATH_SDMA_ABORTING) | \
+	(1UL << IPATH_SDMA_DISARMED))
+#define IPATH_SDMA_ABORT_DISABLED ((1UL << IPATH_SDMA_ABORTING) | \
+	(1UL << IPATH_SDMA_DISABLED))
+#define IPATH_SDMA_ABORT_ABORTED ((1UL << IPATH_SDMA_ABORTING) | \
+	(1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
+#define IPATH_SDMA_ABORT_MASK ((1UL<<IPATH_SDMA_ABORTING) | \
+	(1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
+
+#define IPATH_SDMA_BUF_NONE 0
+#define IPATH_SDMA_BUF_MASK (1UL<<IPATH_SDMA_LAYERBUF)
+
 /* Private data for file operations */
 struct ipath_filedata {
 	struct ipath_portdata *pd;
 	unsigned subport;
 	unsigned tidcursor;
+	struct ipath_user_sdma_queue *pq;
 };
 extern struct list_head ipath_dev_list;
 extern spinlock_t ipath_devs_lock;
@@ -718,7 +848,7 @@
 int ipath_init_chip(struct ipath_devdata *, int);
 int ipath_enable_wc(struct ipath_devdata *dd);
 void ipath_disable_wc(struct ipath_devdata *dd);
-int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
+int ipath_count_units(int *npresentp, int *nupp, int *maxportsp);
 void ipath_shutdown_device(struct ipath_devdata *);
 void ipath_clear_freeze(struct ipath_devdata *);
 
@@ -741,7 +871,8 @@
 extern int ipath_diag_inuse;
 
 irqreturn_t ipath_intr(int irq, void *devid);
-int ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
+int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
+		     ipath_err_t err);
 #if __IPATH_INFO || __IPATH_DBG
 extern const char *ipath_ibcstatus_str[];
 #endif
@@ -774,6 +905,13 @@
 int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 void ipath_enable_armlaunch(struct ipath_devdata *);
 void ipath_disable_armlaunch(struct ipath_devdata *);
+void ipath_hol_down(struct ipath_devdata *);
+void ipath_hol_up(struct ipath_devdata *);
+void ipath_hol_event(unsigned long);
+void ipath_toggle_rclkrls(struct ipath_devdata *);
+void ipath_sd7220_clr_ibpar(struct ipath_devdata *);
+void ipath_set_relock_poll(struct ipath_devdata *, int);
+void ipath_shutdown_relock_poll(struct ipath_devdata *);
 
 /* for use in system calls, where we want to know device type, etc. */
 #define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
@@ -781,11 +919,15 @@
 	((struct ipath_filedata *)(fp)->private_data)->subport
 #define tidcursor_fp(fp) \
 	((struct ipath_filedata *)(fp)->private_data)->tidcursor
+#define user_sdma_queue_fp(fp) \
+	((struct ipath_filedata *)(fp)->private_data)->pq
 
 /*
  * values for ipath_flags
  */
-/* The chip is up and initted */
+		/* chip can report link latency (IB 1.2) */
+#define IPATH_HAS_LINK_LATENCY 0x1
+		/* The chip is up and initted */
 #define IPATH_INITTED       0x2
 		/* set if any user code has set kr_rcvhdrsize */
 #define IPATH_RCVHDRSZ_SET  0x4
@@ -809,6 +951,8 @@
 #define IPATH_LINKUNK       0x400
 		/* Write combining flush needed for PIO */
 #define IPATH_PIO_FLUSH_WC  0x1000
+		/* DMA Receive tail pointer */
+#define IPATH_NODMA_RTAIL   0x2000
 		/* no IB cable, or no device on IB cable */
 #define IPATH_NOCABLE       0x4000
 		/* Supports port zero per packet receive interrupts via
@@ -819,16 +963,26 @@
 		/* packet/word counters are 32 bit, else those 4 counters
 		 * are 64bit */
 #define IPATH_32BITCOUNTERS 0x20000
-		/* can miss port0 rx interrupts */
 		/* Interrupt register is 64 bits */
 #define IPATH_INTREG_64     0x40000
+		/* can miss port0 rx interrupts */
 #define IPATH_DISABLED      0x80000 /* administratively disabled */
 		/* Use GPIO interrupts for new counters */
 #define IPATH_GPIO_ERRINTRS 0x100000
 #define IPATH_SWAP_PIOBUFS  0x200000
+		/* Supports Send DMA */
+#define IPATH_HAS_SEND_DMA  0x400000
+		/* Supports Send Count (not just word count) in PBC */
+#define IPATH_HAS_PBC_CNT   0x800000
 		/* Suppress heartbeat, even if turning off loopback */
 #define IPATH_NO_HRTBT      0x1000000
+#define IPATH_HAS_THRESH_UPDATE 0x4000000
 #define IPATH_HAS_MULT_IB_SPEED 0x8000000
+#define IPATH_IB_AUTONEG_INPROG 0x10000000
+#define IPATH_IB_AUTONEG_FAILED 0x20000000
+		/* Linkdown-disable intentionally, Do not attempt to bring up */
+#define IPATH_IB_LINK_DISABLED 0x40000000
+#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */
 
 /* Bits in GPIO for the added interrupts */
 #define IPATH_GPIO_PORT0_BIT 2
@@ -847,13 +1001,18 @@
 
 /* free up any allocated data at closes */
 void ipath_free_data(struct ipath_portdata *dd);
-u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
+u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *);
+void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
+				unsigned len, int avail);
+void ipath_init_iba7220_funcs(struct ipath_devdata *);
 void ipath_init_iba6120_funcs(struct ipath_devdata *);
 void ipath_init_iba6110_funcs(struct ipath_devdata *);
 void ipath_get_eeprom_info(struct ipath_devdata *);
 int ipath_update_eeprom_log(struct ipath_devdata *dd);
 void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
 u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
+void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
+void ipath_force_pio_avail_update(struct ipath_devdata *);
 void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
 
 /*
@@ -865,6 +1024,34 @@
 #define IPATH_LED_LOG 2  /* Logical (link) YELLOW LED */
 void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
 
+/* send dma routines */
+int setup_sdma(struct ipath_devdata *);
+void teardown_sdma(struct ipath_devdata *);
+void ipath_restart_sdma(struct ipath_devdata *);
+void ipath_sdma_intr(struct ipath_devdata *);
+int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *,
+			  u32, struct ipath_verbs_txreq *);
+/* ipath_sdma_lock should be locked before calling this. */
+int ipath_sdma_make_progress(struct ipath_devdata *dd);
+
+/* must be called under ipath_sdma_lock */
+static inline u16 ipath_sdma_descq_freecnt(const struct ipath_devdata *dd)
+{
+	return dd->ipath_sdma_descq_cnt -
+		(dd->ipath_sdma_descq_added - dd->ipath_sdma_descq_removed) -
+		1 - dd->ipath_sdma_desc_nreserved;
+}
+
+static inline void ipath_sdma_desc_reserve(struct ipath_devdata *dd, u16 cnt)
+{
+	dd->ipath_sdma_desc_nreserved += cnt;
+}
+
+static inline void ipath_sdma_desc_unreserve(struct ipath_devdata *dd, u16 cnt)
+{
+	dd->ipath_sdma_desc_nreserved -= cnt;
+}
+
 /*
  * number of words used for protocol header if not set by ipath_userinit();
  */
@@ -875,6 +1062,8 @@
 void ipath_release_user_pages_on_close(struct page **, size_t);
 int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int);
 int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
+int ipath_tempsense_read(struct ipath_devdata *, u8 regnum);
+int ipath_tempsense_write(struct ipath_devdata *, u8 regnum, u8 data);
 
 /* these are used for the registers that vary with port */
 void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
@@ -891,8 +1080,7 @@
 
 /*
  * At the moment, none of the s-registers are writable, so no
- * ipath_write_sreg(), and none of the c-registers are writable, so no
- * ipath_write_creg().
+ * ipath_write_sreg().
  */
 
 /**
@@ -1001,6 +1189,27 @@
 				pd->port_rcvhdrtail_kvaddr));
 }
 
+static inline u32 ipath_get_hdrqtail(const struct ipath_portdata *pd)
+{
+	const struct ipath_devdata *dd = pd->port_dd;
+	u32 hdrqtail;
+
+	if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
+		__le32 *rhf_addr;
+		u32 seq;
+
+		rhf_addr = (__le32 *) pd->port_rcvhdrq +
+			pd->port_head + dd->ipath_rhf_offset;
+		seq = ipath_hdrget_seq(rhf_addr);
+		hdrqtail = pd->port_head;
+		if (seq == pd->port_seq_cnt)
+			hdrqtail++;
+	} else
+		hdrqtail = ipath_get_rcvhdrtail(pd);
+
+	return hdrqtail;
+}
+
 static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r)
 {
 	return (dd->ipath_flags & IPATH_INTREG_64) ?
@@ -1029,6 +1238,21 @@
 }
 
 /*
+ * from contents of IBCStatus (or a saved copy), return logical link state
+ * combination of link state and linktraining state (down, active, init,
+ * arm, etc.
+ */
+static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs)
+{
+	u32 ibs;
+	ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
+		dd->ibcs_lts_mask;
+	ibs |= (u32)(ibcs &
+		(INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift));
+	return ibs;
+}
+
+/*
  * sysfs interface.
  */
 
@@ -1053,6 +1277,7 @@
 dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
 			  size_t, int);
 dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
+const char *ipath_get_unit_name(int unit);
 
 /*
  * Flush write combining store buffers (if present) and perform a write
@@ -1065,11 +1290,8 @@
 #endif
 
 extern unsigned ipath_debug; /* debugging bit mask */
-
-#define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */
-
-const char *ipath_get_unit_name(int unit);
-
+extern unsigned ipath_linkrecovery;
+extern unsigned ipath_mtu4096;
 extern struct mutex ipath_mutex;
 
 #define IPATH_DRV_NAME		"ib_ipath"
@@ -1096,7 +1318,7 @@
 
 # define __IPATH_DBG_WHICH(which,fmt,...) \
 	do { \
-		if(unlikely(ipath_debug&(which))) \
+		if (unlikely(ipath_debug & (which))) \
 			printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \
 			       __func__,##__VA_ARGS__); \
 	} while(0)
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index b34b91d..1ff46ae 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -146,6 +146,15 @@
 	return reply(smp);
 }
 
+static void set_link_width_enabled(struct ipath_devdata *dd, u32 w)
+{
+	(void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, w);
+}
+
+static void set_link_speed_enabled(struct ipath_devdata *dd, u32 s)
+{
+	(void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, s);
+}
 
 static int get_overrunthreshold(struct ipath_devdata *dd)
 {
@@ -226,6 +235,7 @@
 				  struct ib_device *ibdev, u8 port)
 {
 	struct ipath_ibdev *dev;
+	struct ipath_devdata *dd;
 	struct ib_port_info *pip = (struct ib_port_info *)smp->data;
 	u16 lid;
 	u8 ibcstat;
@@ -239,6 +249,7 @@
 	}
 
 	dev = to_idev(ibdev);
+	dd = dev->dd;
 
 	/* Clear all fields.  Only set the non-zero fields. */
 	memset(smp->data, 0, sizeof(smp->data));
@@ -248,25 +259,28 @@
 	    dev->mkeyprot == 0)
 		pip->mkey = dev->mkey;
 	pip->gid_prefix = dev->gid_prefix;
-	lid = dev->dd->ipath_lid;
+	lid = dd->ipath_lid;
 	pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
 	pip->sm_lid = cpu_to_be16(dev->sm_lid);
 	pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
 	/* pip->diag_code; */
 	pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period);
 	pip->local_port_num = port;
-	pip->link_width_enabled = dev->link_width_enabled;
-	pip->link_width_supported = 3;	/* 1x or 4x */
-	pip->link_width_active = 2;	/* 4x */
-	pip->linkspeed_portstate = 0x10;	/* 2.5Gbps */
-	ibcstat = dev->dd->ipath_lastibcstat;
-	pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1;
+	pip->link_width_enabled = dd->ipath_link_width_enabled;
+	pip->link_width_supported = dd->ipath_link_width_supported;
+	pip->link_width_active = dd->ipath_link_width_active;
+	pip->linkspeed_portstate = dd->ipath_link_speed_supported << 4;
+	ibcstat = dd->ipath_lastibcstat;
+	/* map LinkState to IB portinfo values.  */
+	pip->linkspeed_portstate |= ipath_ib_linkstate(dd, ibcstat) + 1;
+
 	pip->portphysstate_linkdown =
-		(ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
-		(get_linkdowndefaultstate(dev->dd) ? 1 : 2);
-	pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dev->dd->ipath_lmc;
-	pip->linkspeedactive_enabled = 0x11;	/* 2.5Gbps, 2.5Gbps */
-	switch (dev->dd->ipath_ibmtu) {
+		(ipath_cvt_physportstate[ibcstat & dd->ibcs_lts_mask] << 4) |
+		(get_linkdowndefaultstate(dd) ? 1 : 2);
+	pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dd->ipath_lmc;
+	pip->linkspeedactive_enabled = (dd->ipath_link_speed_active << 4) |
+		dd->ipath_link_speed_enabled;
+	switch (dd->ipath_ibmtu) {
 	case 4096:
 		mtu = IB_MTU_4096;
 		break;
@@ -292,19 +306,15 @@
 	/* pip->vl_arb_high_cap; // only one VL */
 	/* pip->vl_arb_low_cap; // only one VL */
 	/* InitTypeReply = 0 */
-	/*
-	 * Note: the chips support a maximum MTU of 4096, but the driver
-	 * hasn't implemented this feature yet, so set the maximum value
-	 * to 2048.
-	 */
-	pip->inittypereply_mtucap = IB_MTU_2048;
-	// HCAs ignore VLStallCount and HOQLife
+	/* our mtu cap depends on whether 4K MTU enabled or not */
+	pip->inittypereply_mtucap = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
+	/* HCAs ignore VLStallCount and HOQLife */
 	/* pip->vlstallcnt_hoqlife; */
 	pip->operationalvl_pei_peo_fpi_fpo = 0x10;	/* OVLs = 1 */
 	pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
 	/* P_KeyViolations are counted by hardware. */
 	pip->pkey_violations =
-		cpu_to_be16((ipath_get_cr_errpkey(dev->dd) -
+		cpu_to_be16((ipath_get_cr_errpkey(dd) -
 			     dev->z_pkey_violations) & 0xFFFF);
 	pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
 	/* Only the hardware GUID is supported for now */
@@ -313,10 +323,17 @@
 	/* 32.768 usec. response time (guessing) */
 	pip->resv_resptimevalue = 3;
 	pip->localphyerrors_overrunerrors =
-		(get_phyerrthreshold(dev->dd) << 4) |
-		get_overrunthreshold(dev->dd);
+		(get_phyerrthreshold(dd) << 4) |
+		get_overrunthreshold(dd);
 	/* pip->max_credit_hint; */
-	/* pip->link_roundtrip_latency[3]; */
+	if (dev->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
+		u32 v;
+
+		v = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LINKLATENCY);
+		pip->link_roundtrip_latency[0] = v >> 16;
+		pip->link_roundtrip_latency[1] = v >> 8;
+		pip->link_roundtrip_latency[2] = v;
+	}
 
 	ret = reply(smp);
 
@@ -444,19 +461,25 @@
 		ib_dispatch_event(&event);
 	}
 
-	/* Only 4x supported but allow 1x or 4x to be set (see 14.2.6.6). */
+	/* Allow 1x or 4x to be set (see 14.2.6.6). */
 	lwe = pip->link_width_enabled;
-	if ((lwe >= 4 && lwe <= 8) || (lwe >= 0xC && lwe <= 0xFE))
-		goto err;
-	if (lwe == 0xFF)
-		dev->link_width_enabled = 3;	/* 1x or 4x */
-	else if (lwe)
-		dev->link_width_enabled = lwe;
+	if (lwe) {
+		if (lwe == 0xFF)
+			lwe = dd->ipath_link_width_supported;
+		else if (lwe >= 16 || (lwe & ~dd->ipath_link_width_supported))
+			goto err;
+		set_link_width_enabled(dd, lwe);
+	}
 
-	/* Only 2.5 Gbs supported. */
+	/* Allow 2.5 or 5.0 Gbs. */
 	lse = pip->linkspeedactive_enabled & 0xF;
-	if (lse >= 2 && lse <= 0xE)
-		goto err;
+	if (lse) {
+		if (lse == 15)
+			lse = dd->ipath_link_speed_supported;
+		else if (lse >= 8 || (lse & ~dd->ipath_link_speed_supported))
+			goto err;
+		set_link_speed_enabled(dd, lse);
+	}
 
 	/* Set link down default state. */
 	switch (pip->portphysstate_linkdown & 0xF) {
@@ -491,6 +514,8 @@
 		mtu = 2048;
 		break;
 	case IB_MTU_4096:
+		if (!ipath_mtu4096)
+			goto err;
 		mtu = 4096;
 		break;
 	default:
@@ -565,6 +590,10 @@
 		else
 			goto err;
 		ipath_set_linkstate(dd, lstate);
+		if (lstate == IPATH_IB_LINKDOWN_DISABLE) {
+			ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+			goto done;
+		}
 		ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED |
 				IPATH_LINKACTIVE, 1000);
 		break;
@@ -948,10 +977,14 @@
 	 * nsec.  0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec.  Sample
 	 * intervals are counted in ticks.  Since we use Linux timers, that
 	 * count in jiffies, we can't sample for less than 1000 ticks if HZ
-	 * == 1000 (4000 ticks if HZ is 250).
+	 * == 1000 (4000 ticks if HZ is 250).  link_speed_active returns 2 for
+	 * DDR, 1 for SDR, set the tick to 1 for DDR, 0 for SDR on chips that
+	 * have hardware support for delaying packets.
 	 */
-	/* XXX This is WRONG. */
-	p->tick = 250;		/* 1 usec. */
+	if (crp->cr_psstat)
+		p->tick = dev->dd->ipath_link_speed_active - 1;
+	else
+		p->tick = 250;		/* 1 usec. */
 	p->counter_width = 4;	/* 32 bit counters */
 	p->counter_mask0_9 = COUNTER_MASK0_9;
 	spin_lock_irqsave(&dev->pending_lock, flags);
@@ -1364,7 +1397,8 @@
 	}
 
 	/* Is the mkey in the process of expiring? */
-	if (dev->mkey_lease_timeout && jiffies >= dev->mkey_lease_timeout) {
+	if (dev->mkey_lease_timeout &&
+	    time_after_eq(jiffies, dev->mkey_lease_timeout)) {
 		/* Clear timeout and mkey protection field. */
 		dev->mkey_lease_timeout = 0;
 		dev->mkeyprot = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 087ed31..dd5b6e9 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -340,6 +340,7 @@
 	qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
 	qp->s_hdrwords = 0;
 	qp->s_wqe = NULL;
+	qp->s_pkt_delay = 0;
 	qp->s_psn = 0;
 	qp->r_psn = 0;
 	qp->r_msn = 0;
@@ -392,7 +393,6 @@
 		  qp->ibqp.qp_num, qp->remote_qpn, err);
 
 	spin_lock(&dev->pending_lock);
-	/* XXX What if its already removed by the timeout code? */
 	if (!list_empty(&qp->timerwait))
 		list_del_init(&qp->timerwait);
 	if (!list_empty(&qp->piowait))
@@ -516,13 +516,13 @@
 			goto inval;
 
 	/*
-	 * Note: the chips support a maximum MTU of 4096, but the driver
-	 * hasn't implemented this feature yet, so don't allow Path MTU
-	 * values greater than 2048.
+	 * don't allow invalid Path MTU values or greater than 2048
+	 * unless we are configured for a 4KB MTU
 	 */
-	if (attr_mask & IB_QP_PATH_MTU)
-		if (attr->path_mtu > IB_MTU_2048)
-			goto inval;
+	if ((attr_mask & IB_QP_PATH_MTU) &&
+		(ib_mtu_enum_to_int(attr->path_mtu) == -1 ||
+		(attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096)))
+		goto inval;
 
 	if (attr_mask & IB_QP_PATH_MIG_STATE)
 		if (attr->path_mig_state != IB_MIG_MIGRATED &&
@@ -564,8 +564,10 @@
 	if (attr_mask & IB_QP_ACCESS_FLAGS)
 		qp->qp_access_flags = attr->qp_access_flags;
 
-	if (attr_mask & IB_QP_AV)
+	if (attr_mask & IB_QP_AV) {
 		qp->remote_ah_attr = attr->ah_attr;
+		qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate);
+	}
 
 	if (attr_mask & IB_QP_PATH_MTU)
 		qp->path_mtu = attr->path_mtu;
@@ -748,22 +750,33 @@
 	size_t sz;
 	struct ib_qp *ret;
 
-	if (init_attr->cap.max_send_sge > ib_ipath_max_sges ||
-	    init_attr->cap.max_recv_sge > ib_ipath_max_sges ||
-	    init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs ||
-	    init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) {
-		ret = ERR_PTR(-ENOMEM);
+	if (init_attr->create_flags) {
+		ret = ERR_PTR(-EINVAL);
 		goto bail;
 	}
 
-	if (init_attr->cap.max_send_sge +
-	    init_attr->cap.max_recv_sge +
-	    init_attr->cap.max_send_wr +
-	    init_attr->cap.max_recv_wr == 0) {
+	if (init_attr->cap.max_send_sge > ib_ipath_max_sges ||
+	    init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) {
 		ret = ERR_PTR(-EINVAL);
 		goto bail;
 	}
 
+	/* Check receive queue parameters if no SRQ is specified. */
+	if (!init_attr->srq) {
+		if (init_attr->cap.max_recv_sge > ib_ipath_max_sges ||
+		    init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) {
+			ret = ERR_PTR(-EINVAL);
+			goto bail;
+		}
+		if (init_attr->cap.max_send_sge +
+		    init_attr->cap.max_send_wr +
+		    init_attr->cap.max_recv_sge +
+		    init_attr->cap.max_recv_wr == 0) {
+			ret = ERR_PTR(-EINVAL);
+			goto bail;
+		}
+	}
+
 	switch (init_attr->qp_type) {
 	case IB_QPT_UC:
 	case IB_QPT_RC:
@@ -840,6 +853,7 @@
 			goto bail_qp;
 		}
 		qp->ip = NULL;
+		qp->s_tx = NULL;
 		ipath_reset_qp(qp, init_attr->qp_type);
 		break;
 
@@ -945,12 +959,20 @@
 	/* Stop the sending tasklet. */
 	tasklet_kill(&qp->s_task);
 
+	if (qp->s_tx) {
+		atomic_dec(&qp->refcount);
+		if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
+			kfree(qp->s_tx->txreq.map_addr);
+	}
+
 	/* Make sure the QP isn't on the timeout list. */
 	spin_lock_irqsave(&dev->pending_lock, flags);
 	if (!list_empty(&qp->timerwait))
 		list_del_init(&qp->timerwait);
 	if (!list_empty(&qp->piowait))
 		list_del_init(&qp->piowait);
+	if (qp->s_tx)
+		list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 
 	/*
@@ -1021,7 +1043,6 @@
 		  qp->ibqp.qp_num, qp->remote_qpn, wc->status);
 
 	spin_lock(&dev->pending_lock);
-	/* XXX What if its already removed by the timeout code? */
 	if (!list_empty(&qp->timerwait))
 		list_del_init(&qp->timerwait);
 	if (!list_empty(&qp->piowait))
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 40f3e37..c405dfb 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -31,6 +31,8 @@
  * SOFTWARE.
  */
 
+#include <linux/io.h>
+
 #include "ipath_verbs.h"
 #include "ipath_kernel.h"
 
@@ -306,7 +308,7 @@
 			else {
 				qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after the BTH */
-				ohdr->u.imm_data = wqe->wr.imm_data;
+				ohdr->u.imm_data = wqe->wr.ex.imm_data;
 				hwords += 1;
 			}
 			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -344,7 +346,7 @@
 				qp->s_state =
 					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after RETH */
-				ohdr->u.rc.imm_data = wqe->wr.imm_data;
+				ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
 				hwords += 1;
 				if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 					bth0 |= 1 << 23;
@@ -488,7 +490,7 @@
 		else {
 			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
-			ohdr->u.imm_data = wqe->wr.imm_data;
+			ohdr->u.imm_data = wqe->wr.ex.imm_data;
 			hwords += 1;
 		}
 		if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -524,7 +526,7 @@
 		else {
 			qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
-			ohdr->u.imm_data = wqe->wr.imm_data;
+			ohdr->u.imm_data = wqe->wr.ex.imm_data;
 			hwords += 1;
 			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 				bth0 |= 1 << 23;
@@ -585,19 +587,39 @@
 static void send_rc_ack(struct ipath_qp *qp)
 {
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+	struct ipath_devdata *dd;
 	u16 lrh0;
 	u32 bth0;
 	u32 hwords;
+	u32 __iomem *piobuf;
 	struct ipath_ib_header hdr;
 	struct ipath_other_headers *ohdr;
 	unsigned long flags;
 
+	spin_lock_irqsave(&qp->s_lock, flags);
+
 	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
 	if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
 	    (qp->s_flags & IPATH_S_ACK_PENDING) ||
 	    qp->s_ack_state != OP(ACKNOWLEDGE))
 		goto queue_ack;
 
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+
+	dd = dev->dd;
+	piobuf = ipath_getpiobuf(dd, 0, NULL);
+	if (!piobuf) {
+		/*
+		 * We are out of PIO buffers at the moment.
+		 * Pass responsibility for sending the ACK to the
+		 * send tasklet so that when a PIO buffer becomes
+		 * available, the ACK is sent ahead of other outgoing
+		 * packets.
+		 */
+		spin_lock_irqsave(&qp->s_lock, flags);
+		goto queue_ack;
+	}
+
 	/* Construct the header. */
 	ohdr = &hdr.u.oth;
 	lrh0 = IPATH_LRH_BTH;
@@ -611,7 +633,7 @@
 		lrh0 = IPATH_LRH_GRH;
 	}
 	/* read pkey_index w/o lock (its atomic) */
-	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
+	bth0 = ipath_get_pkey(dd, qp->s_pkey_index) |
 		(OP(ACKNOWLEDGE) << 24) | (1 << 22);
 	if (qp->r_nak_state)
 		ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
@@ -623,30 +645,29 @@
 	hdr.lrh[0] = cpu_to_be16(lrh0);
 	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
 	hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
-	hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
+	hdr.lrh[3] = cpu_to_be16(dd->ipath_lid);
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
 	ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
 
-	/*
-	 * If we can send the ACK, clear the ACK state.
-	 */
-	if (ipath_verbs_send(qp, &hdr, hwords, NULL, 0) == 0) {
-		dev->n_unicast_xmit++;
-		goto done;
-	}
+	writeq(hwords + 1, piobuf);
 
-	/*
-	 * We are out of PIO buffers at the moment.
-	 * Pass responsibility for sending the ACK to the
-	 * send tasklet so that when a PIO buffer becomes
-	 * available, the ACK is sent ahead of other outgoing
-	 * packets.
-	 */
-	dev->n_rc_qacks++;
+	if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
+		u32 *hdrp = (u32 *) &hdr;
+
+		ipath_flush_wc();
+		__iowrite32_copy(piobuf + 2, hdrp, hwords - 1);
+		ipath_flush_wc();
+		__raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
+	} else
+		__iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords);
+
+	ipath_flush_wc();
+
+	dev->n_unicast_xmit++;
+	goto done;
 
 queue_ack:
-	spin_lock_irqsave(&qp->s_lock, flags);
 	dev->n_rc_qacks++;
 	qp->s_flags |= IPATH_S_ACK_PENDING;
 	qp->s_nak_state = qp->r_nak_state;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 92ad73a..8f44d0c 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -63,67 +63,92 @@
 /* kr_control bits */
 #define INFINIPATH_C_FREEZEMODE 0x00000002
 #define INFINIPATH_C_LINKENABLE 0x00000004
-#define INFINIPATH_C_RESET 0x00000001
 
 /* kr_sendctrl bits */
 #define INFINIPATH_S_DISARMPIOBUF_SHIFT 16
+#define INFINIPATH_S_UPDTHRESH_SHIFT 24
+#define INFINIPATH_S_UPDTHRESH_MASK 0x1f
 
 #define IPATH_S_ABORT		0
 #define IPATH_S_PIOINTBUFAVAIL	1
 #define IPATH_S_PIOBUFAVAILUPD	2
 #define IPATH_S_PIOENABLE	3
+#define IPATH_S_SDMAINTENABLE	9
+#define IPATH_S_SDMASINGLEDESCRIPTOR	10
+#define IPATH_S_SDMAENABLE	11
+#define IPATH_S_SDMAHALT	12
 #define IPATH_S_DISARM		31
 
 #define INFINIPATH_S_ABORT		(1U << IPATH_S_ABORT)
 #define INFINIPATH_S_PIOINTBUFAVAIL	(1U << IPATH_S_PIOINTBUFAVAIL)
 #define INFINIPATH_S_PIOBUFAVAILUPD	(1U << IPATH_S_PIOBUFAVAILUPD)
 #define INFINIPATH_S_PIOENABLE		(1U << IPATH_S_PIOENABLE)
+#define INFINIPATH_S_SDMAINTENABLE	(1U << IPATH_S_SDMAINTENABLE)
+#define INFINIPATH_S_SDMASINGLEDESCRIPTOR \
+					(1U << IPATH_S_SDMASINGLEDESCRIPTOR)
+#define INFINIPATH_S_SDMAENABLE		(1U << IPATH_S_SDMAENABLE)
+#define INFINIPATH_S_SDMAHALT		(1U << IPATH_S_SDMAHALT)
 #define INFINIPATH_S_DISARM		(1U << IPATH_S_DISARM)
 
-/* kr_rcvctrl bits */
+/* kr_rcvctrl bits that are the same on multiple chips */
 #define INFINIPATH_R_PORTENABLE_SHIFT 0
 #define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38)
 
 /* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_SHIFT 0
-#define INFINIPATH_I_RCVAVAIL_SHIFT 12
-#define INFINIPATH_I_ERROR        0x80000000
-#define INFINIPATH_I_SPIOSENT     0x40000000
-#define INFINIPATH_I_SPIOBUFAVAIL 0x20000000
-#define INFINIPATH_I_GPIO         0x10000000
+#define INFINIPATH_I_SDMAINT		0x8000000000000000ULL
+#define INFINIPATH_I_SDMADISABLED	0x4000000000000000ULL
+#define INFINIPATH_I_ERROR		0x0000000080000000ULL
+#define INFINIPATH_I_SPIOSENT		0x0000000040000000ULL
+#define INFINIPATH_I_SPIOBUFAVAIL	0x0000000020000000ULL
+#define INFINIPATH_I_GPIO		0x0000000010000000ULL
+#define INFINIPATH_I_JINT		0x0000000004000000ULL
 
 /* kr_errorstatus, kr_errorclear, kr_errormask bits */
-#define INFINIPATH_E_RFORMATERR      0x0000000000000001ULL
-#define INFINIPATH_E_RVCRC           0x0000000000000002ULL
-#define INFINIPATH_E_RICRC           0x0000000000000004ULL
-#define INFINIPATH_E_RMINPKTLEN      0x0000000000000008ULL
-#define INFINIPATH_E_RMAXPKTLEN      0x0000000000000010ULL
-#define INFINIPATH_E_RLONGPKTLEN     0x0000000000000020ULL
-#define INFINIPATH_E_RSHORTPKTLEN    0x0000000000000040ULL
-#define INFINIPATH_E_RUNEXPCHAR      0x0000000000000080ULL
-#define INFINIPATH_E_RUNSUPVL        0x0000000000000100ULL
-#define INFINIPATH_E_REBP            0x0000000000000200ULL
-#define INFINIPATH_E_RIBFLOW         0x0000000000000400ULL
-#define INFINIPATH_E_RBADVERSION     0x0000000000000800ULL
-#define INFINIPATH_E_RRCVEGRFULL     0x0000000000001000ULL
-#define INFINIPATH_E_RRCVHDRFULL     0x0000000000002000ULL
-#define INFINIPATH_E_RBADTID         0x0000000000004000ULL
-#define INFINIPATH_E_RHDRLEN         0x0000000000008000ULL
-#define INFINIPATH_E_RHDR            0x0000000000010000ULL
-#define INFINIPATH_E_RIBLOSTLINK     0x0000000000020000ULL
-#define INFINIPATH_E_SMINPKTLEN      0x0000000020000000ULL
-#define INFINIPATH_E_SMAXPKTLEN      0x0000000040000000ULL
-#define INFINIPATH_E_SUNDERRUN       0x0000000080000000ULL
-#define INFINIPATH_E_SPKTLEN         0x0000000100000000ULL
-#define INFINIPATH_E_SDROPPEDSMPPKT  0x0000000200000000ULL
-#define INFINIPATH_E_SDROPPEDDATAPKT 0x0000000400000000ULL
-#define INFINIPATH_E_SPIOARMLAUNCH   0x0000000800000000ULL
-#define INFINIPATH_E_SUNEXPERRPKTNUM 0x0000001000000000ULL
-#define INFINIPATH_E_SUNSUPVL        0x0000002000000000ULL
-#define INFINIPATH_E_IBSTATUSCHANGED 0x0001000000000000ULL
-#define INFINIPATH_E_INVALIDADDR     0x0002000000000000ULL
-#define INFINIPATH_E_RESET           0x0004000000000000ULL
-#define INFINIPATH_E_HARDWARE        0x0008000000000000ULL
+#define INFINIPATH_E_RFORMATERR			0x0000000000000001ULL
+#define INFINIPATH_E_RVCRC			0x0000000000000002ULL
+#define INFINIPATH_E_RICRC			0x0000000000000004ULL
+#define INFINIPATH_E_RMINPKTLEN			0x0000000000000008ULL
+#define INFINIPATH_E_RMAXPKTLEN			0x0000000000000010ULL
+#define INFINIPATH_E_RLONGPKTLEN		0x0000000000000020ULL
+#define INFINIPATH_E_RSHORTPKTLEN		0x0000000000000040ULL
+#define INFINIPATH_E_RUNEXPCHAR			0x0000000000000080ULL
+#define INFINIPATH_E_RUNSUPVL			0x0000000000000100ULL
+#define INFINIPATH_E_REBP			0x0000000000000200ULL
+#define INFINIPATH_E_RIBFLOW			0x0000000000000400ULL
+#define INFINIPATH_E_RBADVERSION		0x0000000000000800ULL
+#define INFINIPATH_E_RRCVEGRFULL		0x0000000000001000ULL
+#define INFINIPATH_E_RRCVHDRFULL		0x0000000000002000ULL
+#define INFINIPATH_E_RBADTID			0x0000000000004000ULL
+#define INFINIPATH_E_RHDRLEN			0x0000000000008000ULL
+#define INFINIPATH_E_RHDR			0x0000000000010000ULL
+#define INFINIPATH_E_RIBLOSTLINK		0x0000000000020000ULL
+#define INFINIPATH_E_SENDSPECIALTRIGGER		0x0000000008000000ULL
+#define INFINIPATH_E_SDMADISABLED		0x0000000010000000ULL
+#define INFINIPATH_E_SMINPKTLEN			0x0000000020000000ULL
+#define INFINIPATH_E_SMAXPKTLEN			0x0000000040000000ULL
+#define INFINIPATH_E_SUNDERRUN			0x0000000080000000ULL
+#define INFINIPATH_E_SPKTLEN			0x0000000100000000ULL
+#define INFINIPATH_E_SDROPPEDSMPPKT		0x0000000200000000ULL
+#define INFINIPATH_E_SDROPPEDDATAPKT		0x0000000400000000ULL
+#define INFINIPATH_E_SPIOARMLAUNCH		0x0000000800000000ULL
+#define INFINIPATH_E_SUNEXPERRPKTNUM		0x0000001000000000ULL
+#define INFINIPATH_E_SUNSUPVL			0x0000002000000000ULL
+#define INFINIPATH_E_SENDBUFMISUSE		0x0000004000000000ULL
+#define INFINIPATH_E_SDMAGENMISMATCH		0x0000008000000000ULL
+#define INFINIPATH_E_SDMAOUTOFBOUND		0x0000010000000000ULL
+#define INFINIPATH_E_SDMATAILOUTOFBOUND		0x0000020000000000ULL
+#define INFINIPATH_E_SDMABASE			0x0000040000000000ULL
+#define INFINIPATH_E_SDMA1STDESC		0x0000080000000000ULL
+#define INFINIPATH_E_SDMARPYTAG			0x0000100000000000ULL
+#define INFINIPATH_E_SDMADWEN			0x0000200000000000ULL
+#define INFINIPATH_E_SDMAMISSINGDW		0x0000400000000000ULL
+#define INFINIPATH_E_SDMAUNEXPDATA		0x0000800000000000ULL
+#define INFINIPATH_E_IBSTATUSCHANGED		0x0001000000000000ULL
+#define INFINIPATH_E_INVALIDADDR		0x0002000000000000ULL
+#define INFINIPATH_E_RESET			0x0004000000000000ULL
+#define INFINIPATH_E_HARDWARE			0x0008000000000000ULL
+#define INFINIPATH_E_SDMADESCADDRMISALIGN	0x0010000000000000ULL
+#define INFINIPATH_E_INVALIDEEPCMD		0x0020000000000000ULL
 
 /*
  * this is used to print "common" packet errors only when the
@@ -134,6 +159,17 @@
 		| INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
 		| INFINIPATH_E_REBP )
 
+/* Convenience for decoding Send DMA errors */
+#define INFINIPATH_E_SDMAERRS ( \
+	INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | \
+	INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | \
+	INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | \
+	INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | \
+	INFINIPATH_E_SDMAUNEXPDATA | \
+	INFINIPATH_E_SDMADESCADDRMISALIGN | \
+	INFINIPATH_E_SDMADISABLED | \
+	INFINIPATH_E_SENDBUFMISUSE)
+
 /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
 /* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
  * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2:  expTID, 3: eagerTID
@@ -158,7 +194,7 @@
 #define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO  0x40ULL
 /* waldo specific -- find the rest in ipath_6110.c */
 #define INFINIPATH_HWE_RXDSYNCMEMPARITYERR  0x0000000400000000ULL
-/* monty specific -- find the rest in ipath_6120.c */
+/* 6120/7220 specific -- find the rest in ipath_6120.c and ipath_7220.c */
 #define INFINIPATH_HWE_MEMBISTFAILED	0x0040000000000000ULL
 
 /* kr_hwdiagctrl bits */
@@ -185,8 +221,8 @@
 #define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3
 #define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16
 #define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL
-#define INFINIPATH_IBCC_LINKCMD_DOWN 1	/* move to 0x11 */
-#define INFINIPATH_IBCC_LINKCMD_ARMED 2	/* move to 0x21 */
+#define INFINIPATH_IBCC_LINKCMD_DOWN 1		/* move to 0x11 */
+#define INFINIPATH_IBCC_LINKCMD_ARMED 2		/* move to 0x21 */
 #define INFINIPATH_IBCC_LINKCMD_ACTIVE 3	/* move to 0x31 */
 #define INFINIPATH_IBCC_LINKCMD_SHIFT 18
 #define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL
@@ -201,10 +237,9 @@
 #define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
 
 /* kr_ibcstatus bits */
-#define INFINIPATH_IBCS_LINKTRAININGSTATE_MASK 0xF
 #define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
 #define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
-#define INFINIPATH_IBCS_LINKSTATE_SHIFT 4
+
 #define INFINIPATH_IBCS_TXREADY       0x40000000
 #define INFINIPATH_IBCS_TXCREDITOK    0x80000000
 /* link training states (shift by
@@ -222,30 +257,13 @@
 #define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN	0x0c
 #define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT	0x0e
 #define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE	0x0f
-/* link state machine states (shift by INFINIPATH_IBCS_LINKSTATE_SHIFT) */
+/* link state machine states (shift by ibcs_ls_shift) */
 #define INFINIPATH_IBCS_L_STATE_DOWN		0x0
 #define INFINIPATH_IBCS_L_STATE_INIT		0x1
 #define INFINIPATH_IBCS_L_STATE_ARM		0x2
 #define INFINIPATH_IBCS_L_STATE_ACTIVE		0x3
 #define INFINIPATH_IBCS_L_STATE_ACT_DEFER	0x4
 
-/* combination link status states that we use with some frequency */
-#define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \
-		<< INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | \
-		(INFINIPATH_IBCS_LINKSTATE_MASK \
-		<<INFINIPATH_IBCS_LINKSTATE_SHIFT))
-#define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \
-		<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
-		(INFINIPATH_IBCS_LT_STATE_LINKUP \
-		<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
-#define IPATH_IBSTATE_ARM ((INFINIPATH_IBCS_L_STATE_ARM \
-		<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
-		(INFINIPATH_IBCS_LT_STATE_LINKUP \
-		<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
-#define IPATH_IBSTATE_ACTIVE ((INFINIPATH_IBCS_L_STATE_ACTIVE \
-		<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
-		(INFINIPATH_IBCS_LT_STATE_LINKUP \
-		<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
 
 /* kr_extstatus bits */
 #define INFINIPATH_EXTS_SERDESPLLLOCK 0x1
@@ -286,8 +304,7 @@
 /* L1 Power down; use with RXDETECT, Otherwise not used on IB side */
 #define INFINIPATH_SERDC0_L1PWR_DN	 0xF0ULL
 
-/* kr_xgxsconfig bits */
-#define INFINIPATH_XGXS_RESET          0x7ULL
+/* common kr_xgxsconfig bits (or safe in all, even if not implemented) */
 #define INFINIPATH_XGXS_RX_POL_SHIFT 19
 #define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
 
@@ -417,6 +434,29 @@
 	ipath_kreg kr_pcieq1serdesconfig0;
 	ipath_kreg kr_pcieq1serdesconfig1;
 	ipath_kreg kr_pcieq1serdesstatus;
+	ipath_kreg kr_hrtbt_guid;
+	ipath_kreg kr_ibcddrctrl;
+	ipath_kreg kr_ibcddrstatus;
+	ipath_kreg kr_jintreload;
+
+	/* send dma related regs */
+	ipath_kreg kr_senddmabase;
+	ipath_kreg kr_senddmalengen;
+	ipath_kreg kr_senddmatail;
+	ipath_kreg kr_senddmahead;
+	ipath_kreg kr_senddmaheadaddr;
+	ipath_kreg kr_senddmabufmask0;
+	ipath_kreg kr_senddmabufmask1;
+	ipath_kreg kr_senddmabufmask2;
+	ipath_kreg kr_senddmastatus;
+
+	/* SerDes related regs (IBA7220-only) */
+	ipath_kreg kr_ibserdesctrl;
+	ipath_kreg kr_ib_epbacc;
+	ipath_kreg kr_ib_epbtrans;
+	ipath_kreg kr_pcie_epbacc;
+	ipath_kreg kr_pcie_epbtrans;
+	ipath_kreg kr_ib_ddsrxeq;
 };
 
 struct ipath_cregs {
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index a59bdbd..8ac5c1d 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -310,7 +310,7 @@
 	switch (wqe->wr.opcode) {
 	case IB_WR_SEND_WITH_IMM:
 		wc.wc_flags = IB_WC_WITH_IMM;
-		wc.imm_data = wqe->wr.imm_data;
+		wc.imm_data = wqe->wr.ex.imm_data;
 		/* FALLTHROUGH */
 	case IB_WR_SEND:
 		if (!ipath_get_rwqe(qp, 0)) {
@@ -339,7 +339,7 @@
 			goto err;
 		}
 		wc.wc_flags = IB_WC_WITH_IMM;
-		wc.imm_data = wqe->wr.imm_data;
+		wc.imm_data = wqe->wr.ex.imm_data;
 		if (!ipath_get_rwqe(qp, 1))
 			goto rnr_nak;
 		/* FALLTHROUGH */
@@ -483,14 +483,16 @@
 
 static void want_buffer(struct ipath_devdata *dd)
 {
-	unsigned long flags;
+	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) {
+		unsigned long flags;
 
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			 dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+		dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+				 dd->ipath_sendctrl);
+		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+	}
 }
 
 /**
diff --git a/drivers/infiniband/hw/ipath/ipath_sd7220.c b/drivers/infiniband/hw/ipath/ipath_sd7220.c
new file mode 100644
index 0000000..aa47eb5
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_sd7220.c
@@ -0,0 +1,1462 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/*
+ * This file contains all of the code that is specific to the SerDes
+ * on the InfiniPath 7220 chip.
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "ipath_kernel.h"
+#include "ipath_registers.h"
+#include "ipath_7220.h"
+
+/*
+ * The IBSerDesMappTable is a memory that holds values to be stored in
+ * various SerDes registers by IBC. It is not part of the normal kregs
+ * map and is used in exactly one place, hence the #define below.
+ */
+#define KR_IBSerDesMappTable (0x94000 / (sizeof(uint64_t)))
+
+/*
+ * Below used for sdnum parameter, selecting one of the two sections
+ * used for PCIe, or the single SerDes used for IB.
+ */
+#define PCIE_SERDES0 0
+#define PCIE_SERDES1 1
+
+/*
+ * The EPB requires addressing in a particular form. EPB_LOC() is intended
+ * to make #definitions a little more readable.
+ */
+#define EPB_ADDR_SHF 8
+#define EPB_LOC(chn, elt, reg) \
+	(((elt & 0xf) | ((chn & 7) << 4) | ((reg & 0x3f) << 9)) << \
+	 EPB_ADDR_SHF)
+#define EPB_IB_QUAD0_CS_SHF (25)
+#define EPB_IB_QUAD0_CS (1U <<  EPB_IB_QUAD0_CS_SHF)
+#define EPB_IB_UC_CS_SHF (26)
+#define EPB_PCIE_UC_CS_SHF (27)
+#define EPB_GLOBAL_WR (1U << (EPB_ADDR_SHF + 8))
+
+/* Forward declarations. */
+static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc,
+				u32 data, u32 mask);
+static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val,
+			     int mask);
+static int ipath_sd_trimdone_poll(struct ipath_devdata *dd);
+static void ipath_sd_trimdone_monitor(struct ipath_devdata *dd,
+				      const char *where);
+static int ipath_sd_setvals(struct ipath_devdata *dd);
+static int ipath_sd_early(struct ipath_devdata *dd);
+static int ipath_sd_dactrim(struct ipath_devdata *dd);
+/* Set the registers that IBC may muck with to their default "preset" values */
+int ipath_sd7220_presets(struct ipath_devdata *dd);
+static int ipath_internal_presets(struct ipath_devdata *dd);
+/* Tweak the register (CMUCTRL5) that contains the TRIMSELF controls */
+static int ipath_sd_trimself(struct ipath_devdata *dd, int val);
+static int epb_access(struct ipath_devdata *dd, int sdnum, int claim);
+
+void ipath_set_relock_poll(struct ipath_devdata *dd, int ibup);
+
+/*
+ * Below keeps track of whether the "once per power-on" initialization has
+ * been done, because uC code Version 1.32.17 or higher allows the uC to
+ * be reset at will, and Automatic Equalization may require it. So the
+ * state of the reset "pin", as reflected in was_reset parameter to
+ * ipath_sd7220_init() is no longer valid. Instead, we check for the
+ * actual uC code having been loaded.
+ */
+static int ipath_ibsd_ucode_loaded(struct ipath_devdata *dd)
+{
+	if (!dd->serdes_first_init_done && (ipath_sd7220_ib_vfy(dd) > 0))
+		dd->serdes_first_init_done = 1;
+	return dd->serdes_first_init_done;
+}
+
+/* repeat #define for local use. "Real" #define is in ipath_iba7220.c */
+#define INFINIPATH_HWE_IB_UC_MEMORYPARITYERR      0x0000004000000000ULL
+#define IB_MPREG5 (EPB_LOC(6, 0, 0xE) | (1L << EPB_IB_UC_CS_SHF))
+#define IB_MPREG6 (EPB_LOC(6, 0, 0xF) | (1U << EPB_IB_UC_CS_SHF))
+#define UC_PAR_CLR_D 8
+#define UC_PAR_CLR_M 0xC
+#define IB_CTRL2(chn) (EPB_LOC(chn, 7, 3) | EPB_IB_QUAD0_CS)
+#define START_EQ1(chan) EPB_LOC(chan, 7, 0x27)
+
+void ipath_sd7220_clr_ibpar(struct ipath_devdata *dd)
+{
+	int ret;
+
+	/* clear, then re-enable parity errs */
+	ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6,
+		UC_PAR_CLR_D, UC_PAR_CLR_M);
+	if (ret < 0) {
+		ipath_dev_err(dd, "Failed clearing IBSerDes Parity err\n");
+		goto bail;
+	}
+	ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0,
+		UC_PAR_CLR_M);
+
+	ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+	udelay(4);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+		INFINIPATH_HWE_IB_UC_MEMORYPARITYERR);
+	ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+bail:
+	return;
+}
+
+/*
+ * After a reset or other unusual event, the epb interface may need
+ * to be re-synchronized, between the host and the uC.
+ * returns <0 for failure to resync within IBSD_RESYNC_TRIES (not expected)
+ */
+#define IBSD_RESYNC_TRIES 3
+#define IB_PGUDP(chn) (EPB_LOC((chn), 2, 1) | EPB_IB_QUAD0_CS)
+#define IB_CMUDONE(chn) (EPB_LOC((chn), 7, 0xF) | EPB_IB_QUAD0_CS)
+
+static int ipath_resync_ibepb(struct ipath_devdata *dd)
+{
+	int ret, pat, tries, chn;
+	u32 loc;
+
+	ret = -1;
+	chn = 0;
+	for (tries = 0; tries < (4 * IBSD_RESYNC_TRIES); ++tries) {
+		loc = IB_PGUDP(chn);
+		ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+		if (ret < 0) {
+			ipath_dev_err(dd, "Failed read in resync\n");
+			continue;
+		}
+		if (ret != 0xF0 && ret != 0x55 && tries == 0)
+			ipath_dev_err(dd, "unexpected pattern in resync\n");
+		pat = ret ^ 0xA5; /* alternate F0 and 55 */
+		ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, pat, 0xFF);
+		if (ret < 0) {
+			ipath_dev_err(dd, "Failed write in resync\n");
+			continue;
+		}
+		ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+		if (ret < 0) {
+			ipath_dev_err(dd, "Failed re-read in resync\n");
+			continue;
+		}
+		if (ret != pat) {
+			ipath_dev_err(dd, "Failed compare1 in resync\n");
+			continue;
+		}
+		loc = IB_CMUDONE(chn);
+		ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+		if (ret < 0) {
+			ipath_dev_err(dd, "Failed CMUDONE rd in resync\n");
+			continue;
+		}
+		if ((ret & 0x70) != ((chn << 4) | 0x40)) {
+			ipath_dev_err(dd, "Bad CMUDONE value %02X, chn %d\n",
+				ret, chn);
+			continue;
+		}
+		if (++chn == 4)
+			break;  /* Success */
+	}
+	ipath_cdbg(VERBOSE, "Resync in %d tries\n", tries);
+	return (ret > 0) ? 0 : ret;
+}
+
+/*
+ * Localize the stuff that should be done to change IB uC reset
+ * returns <0 for errors.
+ */
+static int ipath_ibsd_reset(struct ipath_devdata *dd, int assert_rst)
+{
+	u64 rst_val;
+	int ret = 0;
+	unsigned long flags;
+
+	rst_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibserdesctrl);
+	if (assert_rst) {
+		/*
+		 * Vendor recommends "interrupting" uC before reset, to
+		 * minimize possible glitches.
+		 */
+		spin_lock_irqsave(&dd->ipath_sdepb_lock, flags);
+		epb_access(dd, IB_7220_SERDES, 1);
+		rst_val |= 1ULL;
+		/* Squelch possible parity error from _asserting_ reset */
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+			dd->ipath_hwerrmask &
+			~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR);
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibserdesctrl, rst_val);
+		/* flush write, delay to ensure it took effect */
+		ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+		udelay(2);
+		/* once it's reset, can remove interrupt */
+		epb_access(dd, IB_7220_SERDES, -1);
+		spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
+	} else {
+		/*
+		 * Before we de-assert reset, we need to deal with
+		 * possible glitch on the Parity-error line.
+		 * Suppress it around the reset, both in chip-level
+		 * hwerrmask and in IB uC control reg. uC will allow
+		 * it again during startup.
+		 */
+		u64 val;
+		rst_val &= ~(1ULL);
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+			dd->ipath_hwerrmask &
+			~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR);
+
+		ret = ipath_resync_ibepb(dd);
+		if (ret < 0)
+			ipath_dev_err(dd, "unable to re-sync IB EPB\n");
+
+		/* set uC control regs to suppress parity errs */
+		ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG5, 1, 1);
+		if (ret < 0)
+			goto bail;
+		/* IB uC code past Version 1.32.17 allow suppression of wdog */
+		ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80,
+			0x80);
+		if (ret < 0) {
+			ipath_dev_err(dd, "Failed to set WDOG disable\n");
+			goto bail;
+		}
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibserdesctrl, rst_val);
+		/* flush write, delay for startup */
+		ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+		udelay(1);
+		/* clear, then re-enable parity errs */
+		ipath_sd7220_clr_ibpar(dd);
+		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
+		if (val & INFINIPATH_HWE_IB_UC_MEMORYPARITYERR) {
+			ipath_dev_err(dd, "IBUC Parity still set after RST\n");
+			dd->ipath_hwerrmask &=
+				~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR;
+		}
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+			dd->ipath_hwerrmask);
+	}
+
+bail:
+	return ret;
+}
+
+static void ipath_sd_trimdone_monitor(struct ipath_devdata *dd,
+       const char *where)
+{
+	int ret, chn, baduns;
+	u64 val;
+
+	if (!where)
+		where = "?";
+
+	/* give time for reset to settle out in EPB */
+	udelay(2);
+
+	ret = ipath_resync_ibepb(dd);
+	if (ret < 0)
+		ipath_dev_err(dd, "not able to re-sync IB EPB (%s)\n", where);
+
+	/* Do "sacrificial read" to get EPB in sane state after reset */
+	ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(0), 0, 0);
+	if (ret < 0)
+		ipath_dev_err(dd, "Failed TRIMDONE 1st read, (%s)\n", where);
+
+	/* Check/show "summary" Trim-done bit in IBCStatus */
+	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+	if (val & (1ULL << 11))
+		ipath_cdbg(VERBOSE, "IBCS TRIMDONE set (%s)\n", where);
+	else
+		ipath_dev_err(dd, "IBCS TRIMDONE clear (%s)\n", where);
+
+	udelay(2);
+
+	ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, 0x80);
+	if (ret < 0)
+		ipath_dev_err(dd, "Failed Dummy RMW, (%s)\n", where);
+	udelay(10);
+
+	baduns = 0;
+
+	for (chn = 3; chn >= 0; --chn) {
+		/* Read CTRL reg for each channel to check TRIMDONE */
+		ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+			IB_CTRL2(chn), 0, 0);
+		if (ret < 0)
+			ipath_dev_err(dd, "Failed checking TRIMDONE, chn %d"
+				" (%s)\n", chn, where);
+
+		if (!(ret & 0x10)) {
+			int probe;
+			baduns |= (1 << chn);
+			ipath_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)."
+				" (%s)\n", chn, ret, where);
+			probe = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+				IB_PGUDP(0), 0, 0);
+			ipath_dev_err(dd, "probe is %d (%02X)\n",
+				probe, probe);
+			probe = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+				IB_CTRL2(chn), 0, 0);
+			ipath_dev_err(dd, "re-read: %d (%02X)\n",
+				probe, probe);
+			ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+				IB_CTRL2(chn), 0x10, 0x10);
+			if (ret < 0)
+				ipath_dev_err(dd,
+					"Err on TRIMDONE rewrite1\n");
+		}
+	}
+	for (chn = 3; chn >= 0; --chn) {
+		/* Read CTRL reg for each channel to check TRIMDONE */
+		if (baduns & (1 << chn)) {
+			ipath_dev_err(dd,
+				"Reseting TRIMDONE on chn %d (%s)\n",
+				chn, where);
+			ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+				IB_CTRL2(chn), 0x10, 0x10);
+			if (ret < 0)
+				ipath_dev_err(dd, "Failed re-setting "
+					"TRIMDONE, chn %d (%s)\n",
+					chn, where);
+		}
+	}
+}
+
+/*
+ * Below is portion of IBA7220-specific bringup_serdes() that actually
+ * deals with registers and memory within the SerDes itself.
+ * Post IB uC code version 1.32.17, was_reset being 1 is not really
+ * informative, so we double-check.
+ */
+int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset)
+{
+	int ret = 1; /* default to failure */
+	int first_reset;
+	int val_stat;
+
+	if (!was_reset) {
+		/* entered with reset not asserted, we need to do it */
+		ipath_ibsd_reset(dd, 1);
+		ipath_sd_trimdone_monitor(dd, "Driver-reload");
+	}
+
+	/* Substitute our deduced value for was_reset */
+	ret = ipath_ibsd_ucode_loaded(dd);
+	if (ret < 0) {
+		ret = 1;
+		goto done;
+	}
+	first_reset = !ret; /* First reset if IBSD uCode not yet loaded */
+
+	/*
+	 * Alter some regs per vendor latest doc, reset-defaults
+	 * are not right for IB.
+	 */
+	ret = ipath_sd_early(dd);
+	if (ret < 0) {
+		ipath_dev_err(dd, "Failed to set IB SERDES early defaults\n");
+		ret = 1;
+		goto done;
+	}
+
+	/*
+	 * Set DAC manual trim IB.
+	 * We only do this once after chip has been reset (usually
+	 * same as once per system boot).
+	 */
+	if (first_reset) {
+		ret = ipath_sd_dactrim(dd);
+		if (ret < 0) {
+			ipath_dev_err(dd, "Failed IB SERDES DAC trim\n");
+			ret = 1;
+			goto done;
+		}
+	}
+
+	/*
+	 * Set various registers (DDS and RXEQ) that will be
+	 * controlled by IBC (in 1.2 mode) to reasonable preset values
+	 * Calling the "internal" version avoids the "check for needed"
+	 * and "trimdone monitor" that might be counter-productive.
+	 */
+	ret = ipath_internal_presets(dd);
+	if (ret < 0) {
+		ipath_dev_err(dd, "Failed to set IB SERDES presets\n");
+		ret = 1;
+		goto done;
+	}
+	ret = ipath_sd_trimself(dd, 0x80);
+	if (ret < 0) {
+		ipath_dev_err(dd, "Failed to set IB SERDES TRIMSELF\n");
+		ret = 1;
+		goto done;
+	}
+
+	/* Load image, then try to verify */
+	ret = 0;	/* Assume success */
+	if (first_reset) {
+		int vfy;
+		int trim_done;
+		ipath_dbg("SerDes uC was reset, reloading PRAM\n");
+		ret = ipath_sd7220_ib_load(dd);
+		if (ret < 0) {
+			ipath_dev_err(dd, "Failed to load IB SERDES image\n");
+			ret = 1;
+			goto done;
+		}
+
+		/* Loaded image, try to verify */
+		vfy = ipath_sd7220_ib_vfy(dd);
+		if (vfy != ret) {
+			ipath_dev_err(dd, "SERDES PRAM VFY failed\n");
+			ret = 1;
+			goto done;
+		}
+		/*
+		 * Loaded and verified. Almost good...
+		 * hold "success" in ret
+		 */
+		ret = 0;
+
+		/*
+		 * Prev steps all worked, continue bringup
+		 * De-assert RESET to uC, only in first reset, to allow
+		 * trimming.
+		 *
+		 * Since our default setup sets START_EQ1 to
+		 * PRESET, we need to clear that for this very first run.
+		 */
+		ret = ibsd_mod_allchnls(dd, START_EQ1(0), 0, 0x38);
+		if (ret < 0) {
+			ipath_dev_err(dd, "Failed clearing START_EQ1\n");
+			ret = 1;
+			goto done;
+		}
+
+		ipath_ibsd_reset(dd, 0);
+		/*
+		 * If this is not the first reset, trimdone should be set
+		 * already.
+		 */
+		trim_done = ipath_sd_trimdone_poll(dd);
+		/*
+		 * Whether or not trimdone succeeded, we need to put the
+		 * uC back into reset to avoid a possible fight with the
+		 * IBC state-machine.
+		 */
+		ipath_ibsd_reset(dd, 1);
+
+		if (!trim_done) {
+			ipath_dev_err(dd, "No TRIMDONE seen\n");
+			ret = 1;
+			goto done;
+		}
+
+		ipath_sd_trimdone_monitor(dd, "First-reset");
+		/* Remember so we do not re-do the load, dactrim, etc. */
+		dd->serdes_first_init_done = 1;
+	}
+	/*
+	 * Setup for channel training and load values for
+	 * RxEq and DDS in tables used by IBC in IB1.2 mode
+	 */
+
+	val_stat = ipath_sd_setvals(dd);
+	if (val_stat < 0)
+		ret = 1;
+done:
+	/* start relock timer regardless, but start at 1 second */
+	ipath_set_relock_poll(dd, -1);
+	return ret;
+}
+
+#define EPB_ACC_REQ 1
+#define EPB_ACC_GNT 0x100
+#define EPB_DATA_MASK 0xFF
+#define EPB_RD (1ULL << 24)
+#define EPB_TRANS_RDY (1ULL << 31)
+#define EPB_TRANS_ERR (1ULL << 30)
+#define EPB_TRANS_TRIES 5
+
+/*
+ * query, claim, release ownership of the EPB (External Parallel Bus)
+ * for a specified SERDES.
+ * the "claim" parameter is >0 to claim, <0 to release, 0 to query.
+ * Returns <0 for errors, >0 if we had ownership, else 0.
+ */
+static int epb_access(struct ipath_devdata *dd, int sdnum, int claim)
+{
+	u16 acc;
+	u64 accval;
+	int owned = 0;
+	u64 oct_sel = 0;
+
+	switch (sdnum) {
+	case IB_7220_SERDES :
+		/*
+		 * The IB SERDES "ownership" is fairly simple. A single each
+		 * request/grant.
+		 */
+		acc = dd->ipath_kregs->kr_ib_epbacc;
+		break;
+	case PCIE_SERDES0 :
+	case PCIE_SERDES1 :
+		/* PCIe SERDES has two "octants", need to select which */
+		acc = dd->ipath_kregs->kr_pcie_epbacc;
+		oct_sel = (2 << (sdnum - PCIE_SERDES0));
+		break;
+	default :
+		return 0;
+	}
+
+	/* Make sure any outstanding transaction was seen */
+	ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+	udelay(15);
+
+	accval = ipath_read_kreg32(dd, acc);
+
+	owned = !!(accval & EPB_ACC_GNT);
+	if (claim < 0) {
+		/* Need to release */
+		u64 pollval;
+		/*
+		 * The only writeable bits are the request and CS.
+		 * Both should be clear
+		 */
+		u64 newval = 0;
+		ipath_write_kreg(dd, acc, newval);
+		/* First read after write is not trustworthy */
+		pollval = ipath_read_kreg32(dd, acc);
+		udelay(5);
+		pollval = ipath_read_kreg32(dd, acc);
+		if (pollval & EPB_ACC_GNT)
+			owned = -1;
+	} else if (claim > 0) {
+		/* Need to claim */
+		u64 pollval;
+		u64 newval = EPB_ACC_REQ | oct_sel;
+		ipath_write_kreg(dd, acc, newval);
+		/* First read after write is not trustworthy */
+		pollval = ipath_read_kreg32(dd, acc);
+		udelay(5);
+		pollval = ipath_read_kreg32(dd, acc);
+		if (!(pollval & EPB_ACC_GNT))
+			owned = -1;
+	}
+	return owned;
+}
+
+/*
+ * Lemma to deal with race condition of write..read to epb regs
+ */
+static int epb_trans(struct ipath_devdata *dd, u16 reg, u64 i_val, u64 *o_vp)
+{
+	int tries;
+	u64 transval;
+
+
+	ipath_write_kreg(dd, reg, i_val);
+	/* Throw away first read, as RDY bit may be stale */
+	transval = ipath_read_kreg64(dd, reg);
+
+	for (tries = EPB_TRANS_TRIES; tries; --tries) {
+		transval = ipath_read_kreg32(dd, reg);
+		if (transval & EPB_TRANS_RDY)
+			break;
+		udelay(5);
+	}
+	if (transval & EPB_TRANS_ERR)
+		return -1;
+	if (tries > 0 && o_vp)
+		*o_vp = transval;
+	return tries;
+}
+
+/**
+ *
+ * ipath_sd7220_reg_mod - modify SERDES register
+ * @dd: the infinipath device
+ * @sdnum: which SERDES to access
+ * @loc: location - channel, element, register, as packed by EPB_LOC() macro.
+ * @wd: Write Data - value to set in register
+ * @mask: ones where data should be spliced into reg.
+ *
+ * Basic register read/modify/write, with un-needed acesses elided. That is,
+ * a mask of zero will prevent write, while a mask of 0xFF will prevent read.
+ * returns current (presumed, if a write was done) contents of selected
+ * register, or <0 if errors.
+ */
+static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc,
+				u32 wd, u32 mask)
+{
+	u16 trans;
+	u64 transval;
+	int owned;
+	int tries, ret;
+	unsigned long flags;
+
+	switch (sdnum) {
+	case IB_7220_SERDES :
+		trans = dd->ipath_kregs->kr_ib_epbtrans;
+		break;
+	case PCIE_SERDES0 :
+	case PCIE_SERDES1 :
+		trans = dd->ipath_kregs->kr_pcie_epbtrans;
+		break;
+	default :
+		return -1;
+	}
+
+	/*
+	 * All access is locked in software (vs other host threads) and
+	 * hardware (vs uC access).
+	 */
+	spin_lock_irqsave(&dd->ipath_sdepb_lock, flags);
+
+	owned = epb_access(dd, sdnum, 1);
+	if (owned < 0) {
+		spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
+		return -1;
+	}
+	ret = 0;
+	for (tries = EPB_TRANS_TRIES; tries; --tries) {
+		transval = ipath_read_kreg32(dd, trans);
+		if (transval & EPB_TRANS_RDY)
+			break;
+		udelay(5);
+	}
+
+	if (tries > 0) {
+		tries = 1;	/* to make read-skip work */
+		if (mask != 0xFF) {
+			/*
+			 * Not a pure write, so need to read.
+			 * loc encodes chip-select as well as address
+			 */
+			transval = loc | EPB_RD;
+			tries = epb_trans(dd, trans, transval, &transval);
+		}
+		if (tries > 0 && mask != 0) {
+			/*
+			 * Not a pure read, so need to write.
+			 */
+			wd = (wd & mask) | (transval & ~mask);
+			transval = loc | (wd & EPB_DATA_MASK);
+			tries = epb_trans(dd, trans, transval, &transval);
+		}
+	}
+	/* else, failed to see ready, what error-handling? */
+
+	/*
+	 * Release bus. Failure is an error.
+	 */
+	if (epb_access(dd, sdnum, -1) < 0)
+		ret = -1;
+	else
+		ret = transval & EPB_DATA_MASK;
+
+	spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
+	if (tries <= 0)
+		ret = -1;
+	return ret;
+}
+
+#define EPB_ROM_R (2)
+#define EPB_ROM_W (1)
+/*
+ * Below, all uC-related, use appropriate UC_CS, depending
+ * on which SerDes is used.
+ */
+#define EPB_UC_CTL EPB_LOC(6, 0, 0)
+#define EPB_MADDRL EPB_LOC(6, 0, 2)
+#define EPB_MADDRH EPB_LOC(6, 0, 3)
+#define EPB_ROMDATA EPB_LOC(6, 0, 4)
+#define EPB_RAMDATA EPB_LOC(6, 0, 5)
+
+/* Transfer date to/from uC Program RAM of IB or PCIe SerDes */
+static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc,
+			       u8 *buf, int cnt, int rd_notwr)
+{
+	u16 trans;
+	u64 transval;
+	u64 csbit;
+	int owned;
+	int tries;
+	int sofar;
+	int addr;
+	int ret;
+	unsigned long flags;
+	const char *op;
+
+	/* Pick appropriate transaction reg and "Chip select" for this serdes */
+	switch (sdnum) {
+	case IB_7220_SERDES :
+		csbit = 1ULL << EPB_IB_UC_CS_SHF;
+		trans = dd->ipath_kregs->kr_ib_epbtrans;
+		break;
+	case PCIE_SERDES0 :
+	case PCIE_SERDES1 :
+		/* PCIe SERDES has uC "chip select" in different bit, too */
+		csbit = 1ULL << EPB_PCIE_UC_CS_SHF;
+		trans = dd->ipath_kregs->kr_pcie_epbtrans;
+		break;
+	default :
+		return -1;
+	}
+
+	op = rd_notwr ? "Rd" : "Wr";
+	spin_lock_irqsave(&dd->ipath_sdepb_lock, flags);
+
+	owned = epb_access(dd, sdnum, 1);
+	if (owned < 0) {
+		spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
+		ipath_dbg("Could not get %s access to %s EPB: %X, loc %X\n",
+			op, (sdnum == IB_7220_SERDES) ? "IB" : "PCIe",
+			owned, loc);
+		return -1;
+	}
+
+	/*
+	 * In future code, we may need to distinguish several address ranges,
+	 * and select various memories based on this. For now, just trim
+	 * "loc" (location including address and memory select) to
+	 * "addr" (address within memory). we will only support PRAM
+	 * The memory is 8KB.
+	 */
+	addr = loc & 0x1FFF;
+	for (tries = EPB_TRANS_TRIES; tries; --tries) {
+		transval = ipath_read_kreg32(dd, trans);
+		if (transval & EPB_TRANS_RDY)
+			break;
+		udelay(5);
+	}
+
+	sofar = 0;
+	if (tries <= 0)
+		ipath_dbg("No initial RDY on EPB access request\n");
+	else {
+		/*
+		 * Every "memory" access is doubly-indirect.
+		 * We set two bytes of address, then read/write
+		 * one or mores bytes of data.
+		 */
+
+		/* First, we set control to "Read" or "Write" */
+		transval = csbit | EPB_UC_CTL |
+			(rd_notwr ? EPB_ROM_R : EPB_ROM_W);
+		tries = epb_trans(dd, trans, transval, &transval);
+		if (tries <= 0)
+			ipath_dbg("No EPB response to uC %s cmd\n", op);
+		while (tries > 0 && sofar < cnt) {
+			if (!sofar) {
+				/* Only set address at start of chunk */
+				int addrbyte = (addr + sofar) >> 8;
+				transval = csbit | EPB_MADDRH | addrbyte;
+				tries = epb_trans(dd, trans, transval,
+						  &transval);
+				if (tries <= 0) {
+					ipath_dbg("No EPB response ADDRH\n");
+					break;
+				}
+				addrbyte = (addr + sofar) & 0xFF;
+				transval = csbit | EPB_MADDRL | addrbyte;
+				tries = epb_trans(dd, trans, transval,
+						 &transval);
+				if (tries <= 0) {
+					ipath_dbg("No EPB response ADDRL\n");
+					break;
+				}
+			}
+
+			if (rd_notwr)
+				transval = csbit | EPB_ROMDATA | EPB_RD;
+			else
+				transval = csbit | EPB_ROMDATA | buf[sofar];
+			tries = epb_trans(dd, trans, transval, &transval);
+			if (tries <= 0) {
+				ipath_dbg("No EPB response DATA\n");
+				break;
+			}
+			if (rd_notwr)
+				buf[sofar] = transval & EPB_DATA_MASK;
+			++sofar;
+		}
+		/* Finally, clear control-bit for Read or Write */
+		transval = csbit | EPB_UC_CTL;
+		tries = epb_trans(dd, trans, transval, &transval);
+		if (tries <= 0)
+			ipath_dbg("No EPB response to drop of uC %s cmd\n", op);
+	}
+
+	ret = sofar;
+	/* Release bus. Failure is an error */
+	if (epb_access(dd, sdnum, -1) < 0)
+		ret = -1;
+
+	spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
+	if (tries <= 0) {
+		ipath_dbg("SERDES PRAM %s failed after %d bytes\n", op, sofar);
+		ret = -1;
+	}
+	return ret;
+}
+
+#define PROG_CHUNK 64
+
+int ipath_sd7220_prog_ld(struct ipath_devdata *dd, int sdnum,
+	u8 *img, int len, int offset)
+{
+	int cnt, sofar, req;
+
+	sofar = 0;
+	while (sofar < len) {
+		req = len - sofar;
+		if (req > PROG_CHUNK)
+			req = PROG_CHUNK;
+		cnt = ipath_sd7220_ram_xfer(dd, sdnum, offset + sofar,
+					  img + sofar, req, 0);
+		if (cnt < req) {
+			sofar = -1;
+			break;
+		}
+		sofar += req;
+	}
+	return sofar;
+}
+
+#define VFY_CHUNK 64
+#define SD_PRAM_ERROR_LIMIT 42
+
+int ipath_sd7220_prog_vfy(struct ipath_devdata *dd, int sdnum,
+	const u8 *img, int len, int offset)
+{
+	int cnt, sofar, req, idx, errors;
+	unsigned char readback[VFY_CHUNK];
+
+	errors = 0;
+	sofar = 0;
+	while (sofar < len) {
+		req = len - sofar;
+		if (req > VFY_CHUNK)
+			req = VFY_CHUNK;
+		cnt = ipath_sd7220_ram_xfer(dd, sdnum, sofar + offset,
+					  readback, req, 1);
+		if (cnt < req) {
+			/* failed in read itself */
+			sofar = -1;
+			break;
+		}
+		for (idx = 0; idx < cnt; ++idx) {
+			if (readback[idx] != img[idx+sofar])
+				++errors;
+		}
+		sofar += cnt;
+	}
+	return errors ? -errors : sofar;
+}
+
+/* IRQ not set up at this point in init, so we poll. */
+#define IB_SERDES_TRIM_DONE (1ULL << 11)
+#define TRIM_TMO (30)
+
+static int ipath_sd_trimdone_poll(struct ipath_devdata *dd)
+{
+	int trim_tmo, ret;
+	uint64_t val;
+
+	/*
+	 * Default to failure, so IBC will not start
+	 * without IB_SERDES_TRIM_DONE.
+	 */
+	ret = 0;
+	for (trim_tmo = 0; trim_tmo < TRIM_TMO; ++trim_tmo) {
+		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+		if (val & IB_SERDES_TRIM_DONE) {
+			ipath_cdbg(VERBOSE, "TRIMDONE after %d\n", trim_tmo);
+			ret = 1;
+			break;
+		}
+		msleep(10);
+	}
+	if (trim_tmo >= TRIM_TMO) {
+		ipath_dev_err(dd, "No TRIMDONE in %d tries\n", trim_tmo);
+		ret = 0;
+	}
+	return ret;
+}
+
+#define TX_FAST_ELT (9)
+
+/*
+ * Set the "negotiation" values for SERDES. These are used by the IB1.2
+ * link negotiation. Macros below are attempt to keep the values a
+ * little more human-editable.
+ * First, values related to Drive De-emphasis Settings.
+ */
+
+#define NUM_DDS_REGS 6
+#define DDS_REG_MAP 0x76A910 /* LSB-first list of regs (in elt 9) to mod */
+
+#define DDS_VAL(amp_d, main_d, ipst_d, ipre_d, amp_s, main_s, ipst_s, ipre_s) \
+	{ { ((amp_d & 0x1F) << 1) | 1, ((amp_s & 0x1F) << 1) | 1, \
+	  (main_d << 3) | 4 | (ipre_d >> 2), \
+	  (main_s << 3) | 4 | (ipre_s >> 2), \
+	  ((ipst_d & 0xF) << 1) | ((ipre_d & 3) << 6) | 0x21, \
+	  ((ipst_s & 0xF) << 1) | ((ipre_s & 3) << 6) | 0x21 } }
+
+static struct dds_init {
+	uint8_t reg_vals[NUM_DDS_REGS];
+} dds_init_vals[] = {
+	/*       DDR(FDR)       SDR(HDR)   */
+	/* Vendor recommends below for 3m cable */
+#define DDS_3M 0
+	DDS_VAL(31, 19, 12, 0, 29, 22,  9, 0),
+	DDS_VAL(31, 12, 15, 4, 31, 15, 15, 1),
+	DDS_VAL(31, 13, 15, 3, 31, 16, 15, 0),
+	DDS_VAL(31, 14, 15, 2, 31, 17, 14, 0),
+	DDS_VAL(31, 15, 15, 1, 31, 18, 13, 0),
+	DDS_VAL(31, 16, 15, 0, 31, 19, 12, 0),
+	DDS_VAL(31, 17, 14, 0, 31, 20, 11, 0),
+	DDS_VAL(31, 18, 13, 0, 30, 21, 10, 0),
+	DDS_VAL(31, 20, 11, 0, 28, 23,  8, 0),
+	DDS_VAL(31, 21, 10, 0, 27, 24,  7, 0),
+	DDS_VAL(31, 22,  9, 0, 26, 25,  6, 0),
+	DDS_VAL(30, 23,  8, 0, 25, 26,  5, 0),
+	DDS_VAL(29, 24,  7, 0, 23, 27,  4, 0),
+	/* Vendor recommends below for 1m cable */
+#define DDS_1M 13
+	DDS_VAL(28, 25,  6, 0, 21, 28,  3, 0),
+	DDS_VAL(27, 26,  5, 0, 19, 29,  2, 0),
+	DDS_VAL(25, 27,  4, 0, 17, 30,  1, 0)
+};
+
+/*
+ * Next, values related to Receive Equalization.
+ * In comments, FDR (Full) is IB DDR, HDR (Half) is IB SDR
+ */
+/* Hardware packs an element number and register address thus: */
+#define RXEQ_INIT_RDESC(elt, addr) (((elt) & 0xF) | ((addr) << 4))
+#define RXEQ_VAL(elt, adr, val0, val1, val2, val3) \
+	{RXEQ_INIT_RDESC((elt), (adr)), {(val0), (val1), (val2), (val3)} }
+
+#define RXEQ_VAL_ALL(elt, adr, val)  \
+	{RXEQ_INIT_RDESC((elt), (adr)), {(val), (val), (val), (val)} }
+
+#define RXEQ_SDR_DFELTH 0
+#define RXEQ_SDR_TLTH 0
+#define RXEQ_SDR_G1CNT_Z1CNT 0x11
+#define RXEQ_SDR_ZCNT 23
+
+static struct rxeq_init {
+	u16 rdesc;	/* in form used in SerDesDDSRXEQ */
+	u8  rdata[4];
+} rxeq_init_vals[] = {
+	/* Set Rcv Eq. to Preset node */
+	RXEQ_VAL_ALL(7, 0x27, 0x10),
+	/* Set DFELTHFDR/HDR thresholds */
+	RXEQ_VAL(7, 8,    0, 0, 0, 0), /* FDR */
+	RXEQ_VAL(7, 0x21, 0, 0, 0, 0), /* HDR */
+	/* Set TLTHFDR/HDR theshold */
+	RXEQ_VAL(7, 9,    2, 2, 2, 2), /* FDR */
+	RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR */
+	/* Set Preamp setting 2 (ZFR/ZCNT) */
+	RXEQ_VAL(7, 0x1B, 12, 12, 12, 12), /* FDR */
+	RXEQ_VAL(7, 0x1C, 12, 12, 12, 12), /* HDR */
+	/* Set Preamp DC gain and Setting 1 (GFR/GHR) */
+	RXEQ_VAL(7, 0x1E, 0x10, 0x10, 0x10, 0x10), /* FDR */
+	RXEQ_VAL(7, 0x1F, 0x10, 0x10, 0x10, 0x10), /* HDR */
+	/* Toggle RELOCK (in VCDL_CTRL0) to lock to data */
+	RXEQ_VAL_ALL(6, 6, 0x20), /* Set D5 High */
+	RXEQ_VAL_ALL(6, 6, 0), /* Set D5 Low */
+};
+
+/* There are 17 values from vendor, but IBC only accesses the first 16 */
+#define DDS_ROWS (16)
+#define RXEQ_ROWS ARRAY_SIZE(rxeq_init_vals)
+
+static int ipath_sd_setvals(struct ipath_devdata *dd)
+{
+	int idx, midx;
+	int min_idx;	 /* Minimum index for this portion of table */
+	uint32_t dds_reg_map;
+	u64 __iomem *taddr, *iaddr;
+	uint64_t data;
+	uint64_t sdctl;
+
+	taddr = dd->ipath_kregbase + KR_IBSerDesMappTable;
+	iaddr = dd->ipath_kregbase + dd->ipath_kregs->kr_ib_ddsrxeq;
+
+	/*
+	 * Init the DDS section of the table.
+	 * Each "row" of the table provokes NUM_DDS_REG writes, to the
+	 * registers indicated in DDS_REG_MAP.
+	 */
+	sdctl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibserdesctrl);
+	sdctl = (sdctl & ~(0x1f << 8)) | (NUM_DDS_REGS << 8);
+	sdctl = (sdctl & ~(0x1f << 13)) | (RXEQ_ROWS << 13);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibserdesctrl, sdctl);
+
+	/*
+	 * Iterate down table within loop for each register to store.
+	 */
+	dds_reg_map = DDS_REG_MAP;
+	for (idx = 0; idx < NUM_DDS_REGS; ++idx) {
+		data = ((dds_reg_map & 0xF) << 4) | TX_FAST_ELT;
+		writeq(data, iaddr + idx);
+		mmiowb();
+		ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+		dds_reg_map >>= 4;
+		for (midx = 0; midx < DDS_ROWS; ++midx) {
+			u64 __iomem *daddr = taddr + ((midx << 4) + idx);
+			data = dds_init_vals[midx].reg_vals[idx];
+			writeq(data, daddr);
+			mmiowb();
+			ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+		} /* End inner for (vals for this reg, each row) */
+	} /* end outer for (regs to be stored) */
+
+	/*
+	 * Init the RXEQ section of the table. As explained above the table
+	 * rxeq_init_vals[], this runs in a different order, as the pattern
+	 * of register references is more complex, but there are only
+	 * four "data" values per register.
+	 */
+	min_idx = idx; /* RXEQ indices pick up where DDS left off */
+	taddr += 0x100; /* RXEQ data is in second half of table */
+	/* Iterate through RXEQ register addresses */
+	for (idx = 0; idx < RXEQ_ROWS; ++idx) {
+		int didx; /* "destination" */
+		int vidx;
+
+		/* didx is offset by min_idx to address RXEQ range of regs */
+		didx = idx + min_idx;
+		/* Store the next RXEQ register address */
+		writeq(rxeq_init_vals[idx].rdesc, iaddr + didx);
+		mmiowb();
+		ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+		/* Iterate through RXEQ values */
+		for (vidx = 0; vidx < 4; vidx++) {
+			data = rxeq_init_vals[idx].rdata[vidx];
+			writeq(data, taddr + (vidx << 6) + idx);
+			mmiowb();
+			ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+		}
+	} /* end outer for (Reg-writes for RXEQ) */
+	return 0;
+}
+
+#define CMUCTRL5 EPB_LOC(7, 0, 0x15)
+#define RXHSCTRL0(chan) EPB_LOC(chan, 6, 0)
+#define VCDL_DAC2(chan) EPB_LOC(chan, 6, 5)
+#define VCDL_CTRL0(chan) EPB_LOC(chan, 6, 6)
+#define VCDL_CTRL2(chan) EPB_LOC(chan, 6, 8)
+#define START_EQ2(chan) EPB_LOC(chan, 7, 0x28)
+
+static int ibsd_sto_noisy(struct ipath_devdata *dd, int loc, int val, int mask)
+{
+	int ret = -1;
+	int sloc; /* shifted loc, for messages */
+
+	loc |= (1U << EPB_IB_QUAD0_CS_SHF);
+	sloc = loc >> EPB_ADDR_SHF;
+
+	ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, mask);
+	if (ret < 0)
+		ipath_dev_err(dd, "Write failed: elt %d,"
+			" addr 0x%X, chnl %d, val 0x%02X, mask 0x%02X\n",
+			(sloc & 0xF), (sloc >> 9) & 0x3f, (sloc >> 4) & 7,
+			val & 0xFF, mask & 0xFF);
+	return ret;
+}
+
+/*
+ * Repeat a "store" across all channels of the IB SerDes.
+ * Although nominally it inherits the "read value" of the last
+ * channel it modified, the only really useful return is <0 for
+ * failure, >= 0 for success. The parameter 'loc' is assumed to
+ * be the location for the channel-0 copy of the register to
+ * be modified.
+ */
+static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val,
+	int mask)
+{
+	int ret = -1;
+	int chnl;
+
+	if (loc & EPB_GLOBAL_WR) {
+		/*
+		 * Our caller has assured us that we can set all four
+		 * channels at once. Trust that. If mask is not 0xFF,
+		 * we will read the _specified_ channel for our starting
+		 * value.
+		 */
+		loc |= (1U << EPB_IB_QUAD0_CS_SHF);
+		chnl = (loc >> (4 + EPB_ADDR_SHF)) & 7;
+		if (mask != 0xFF) {
+			ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+				loc & ~EPB_GLOBAL_WR, 0, 0);
+			if (ret < 0) {
+				int sloc = loc >> EPB_ADDR_SHF;
+				ipath_dev_err(dd, "pre-read failed: elt %d,"
+					" addr 0x%X, chnl %d\n", (sloc & 0xF),
+					(sloc >> 9) & 0x3f, chnl);
+				return ret;
+			}
+			val = (ret & ~mask) | (val & mask);
+		}
+		loc &=  ~(7 << (4+EPB_ADDR_SHF));
+		ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF);
+		if (ret < 0) {
+			int sloc = loc >> EPB_ADDR_SHF;
+			ipath_dev_err(dd, "Global WR failed: elt %d,"
+				" addr 0x%X, val %02X\n",
+				(sloc & 0xF), (sloc >> 9) & 0x3f, val);
+		}
+		return ret;
+	}
+	/* Clear "channel" and set CS so we can simply iterate */
+	loc &=  ~(7 << (4+EPB_ADDR_SHF));
+	loc |= (1U << EPB_IB_QUAD0_CS_SHF);
+	for (chnl = 0; chnl < 4; ++chnl) {
+		int cloc;
+		cloc = loc | (chnl << (4+EPB_ADDR_SHF));
+		ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, cloc, val, mask);
+		if (ret < 0) {
+			int sloc = loc >> EPB_ADDR_SHF;
+			ipath_dev_err(dd, "Write failed: elt %d,"
+				" addr 0x%X, chnl %d, val 0x%02X,"
+				" mask 0x%02X\n",
+				(sloc & 0xF), (sloc >> 9) & 0x3f, chnl,
+				val & 0xFF, mask & 0xFF);
+			break;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Set the Tx values normally modified by IBC in IB1.2 mode to default
+ * values, as gotten from first row of init table.
+ */
+static int set_dds_vals(struct ipath_devdata *dd, struct dds_init *ddi)
+{
+	int ret;
+	int idx, reg, data;
+	uint32_t regmap;
+
+	regmap = DDS_REG_MAP;
+	for (idx = 0; idx < NUM_DDS_REGS; ++idx) {
+		reg = (regmap & 0xF);
+		regmap >>= 4;
+		data = ddi->reg_vals[idx];
+		/* Vendor says RMW not needed for these regs, use 0xFF mask */
+		ret = ibsd_mod_allchnls(dd, EPB_LOC(0, 9, reg), data, 0xFF);
+		if (ret < 0)
+			break;
+	}
+	return ret;
+}
+
+/*
+ * Set the Rx values normally modified by IBC in IB1.2 mode to default
+ * values, as gotten from selected column of init table.
+ */
+static int set_rxeq_vals(struct ipath_devdata *dd, int vsel)
+{
+	int ret;
+	int ridx;
+	int cnt = ARRAY_SIZE(rxeq_init_vals);
+
+	for (ridx = 0; ridx < cnt; ++ridx) {
+		int elt, reg, val, loc;
+		elt = rxeq_init_vals[ridx].rdesc & 0xF;
+		reg = rxeq_init_vals[ridx].rdesc >> 4;
+		loc = EPB_LOC(0, elt, reg);
+		val = rxeq_init_vals[ridx].rdata[vsel];
+		/* mask of 0xFF, because hardware does full-byte store. */
+		ret = ibsd_mod_allchnls(dd, loc, val, 0xFF);
+		if (ret < 0)
+			break;
+	}
+	return ret;
+}
+
+/*
+ * Set the default values (row 0) for DDR Driver Demphasis.
+ * we do this initially and whenever we turn off IB-1.2
+ * The "default" values for Rx equalization are also stored to
+ * SerDes registers. Formerly (and still default), we used set 2.
+ * For experimenting with cables and link-partners, we allow changing
+ * that via a module parameter.
+ */
+static unsigned ipath_rxeq_set = 2;
+module_param_named(rxeq_default_set, ipath_rxeq_set, uint,
+	S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(rxeq_default_set,
+	"Which set [0..3] of Rx Equalization values is default");
+
+static int ipath_internal_presets(struct ipath_devdata *dd)
+{
+	int ret = 0;
+
+	ret = set_dds_vals(dd, dds_init_vals + DDS_3M);
+
+	if (ret < 0)
+		ipath_dev_err(dd, "Failed to set default DDS values\n");
+	ret = set_rxeq_vals(dd, ipath_rxeq_set & 3);
+	if (ret < 0)
+		ipath_dev_err(dd, "Failed to set default RXEQ values\n");
+	return ret;
+}
+
+int ipath_sd7220_presets(struct ipath_devdata *dd)
+{
+	int ret = 0;
+
+	if (!dd->ipath_presets_needed)
+		return ret;
+	dd->ipath_presets_needed = 0;
+	/* Assert uC reset, so we don't clash with it. */
+	ipath_ibsd_reset(dd, 1);
+	udelay(2);
+	ipath_sd_trimdone_monitor(dd, "link-down");
+
+	ret = ipath_internal_presets(dd);
+return ret;
+}
+
+static int ipath_sd_trimself(struct ipath_devdata *dd, int val)
+{
+	return ibsd_sto_noisy(dd, CMUCTRL5, val, 0xFF);
+}
+
+static int ipath_sd_early(struct ipath_devdata *dd)
+{
+	int ret = -1; /* Default failed */
+	int chnl;
+
+	for (chnl = 0; chnl < 4; ++chnl) {
+		ret = ibsd_sto_noisy(dd, RXHSCTRL0(chnl), 0xD4, 0xFF);
+		if (ret < 0)
+			goto bail;
+	}
+	for (chnl = 0; chnl < 4; ++chnl) {
+		ret = ibsd_sto_noisy(dd, VCDL_DAC2(chnl), 0x2D, 0xFF);
+		if (ret < 0)
+			goto bail;
+	}
+	/* more fine-tuning of what will be default */
+	for (chnl = 0; chnl < 4; ++chnl) {
+		ret = ibsd_sto_noisy(dd, VCDL_CTRL2(chnl), 3, 0xF);
+		if (ret < 0)
+			goto bail;
+	}
+	for (chnl = 0; chnl < 4; ++chnl) {
+		ret = ibsd_sto_noisy(dd, START_EQ1(chnl), 0x10, 0xFF);
+		if (ret < 0)
+			goto bail;
+	}
+	for (chnl = 0; chnl < 4; ++chnl) {
+		ret = ibsd_sto_noisy(dd, START_EQ2(chnl), 0x30, 0xFF);
+		if (ret < 0)
+			goto bail;
+	}
+bail:
+	return ret;
+}
+
+#define BACTRL(chnl) EPB_LOC(chnl, 6, 0x0E)
+#define LDOUTCTRL1(chnl) EPB_LOC(chnl, 7, 6)
+#define RXHSSTATUS(chnl) EPB_LOC(chnl, 6, 0xF)
+
+static int ipath_sd_dactrim(struct ipath_devdata *dd)
+{
+	int ret = -1; /* Default failed */
+	int chnl;
+
+	for (chnl = 0; chnl < 4; ++chnl) {
+		ret = ibsd_sto_noisy(dd, BACTRL(chnl), 0x40, 0xFF);
+		if (ret < 0)
+			goto bail;
+	}
+	for (chnl = 0; chnl < 4; ++chnl) {
+		ret = ibsd_sto_noisy(dd, LDOUTCTRL1(chnl), 0x04, 0xFF);
+		if (ret < 0)
+			goto bail;
+	}
+	for (chnl = 0; chnl < 4; ++chnl) {
+		ret = ibsd_sto_noisy(dd, RXHSSTATUS(chnl), 0x04, 0xFF);
+		if (ret < 0)
+			goto bail;
+	}
+	/*
+	 * delay for max possible number of steps, with slop.
+	 * Each step is about 4usec.
+	 */
+	udelay(415);
+	for (chnl = 0; chnl < 4; ++chnl) {
+		ret = ibsd_sto_noisy(dd, LDOUTCTRL1(chnl), 0x00, 0xFF);
+		if (ret < 0)
+			goto bail;
+	}
+bail:
+	return ret;
+}
+
+#define RELOCK_FIRST_MS 3
+#define RXLSPPM(chan) EPB_LOC(chan, 0, 2)
+void ipath_toggle_rclkrls(struct ipath_devdata *dd)
+{
+	int loc = RXLSPPM(0) | EPB_GLOBAL_WR;
+	int ret;
+
+	ret = ibsd_mod_allchnls(dd, loc, 0, 0x80);
+	if (ret < 0)
+		ipath_dev_err(dd, "RCLKRLS failed to clear D7\n");
+	else {
+		udelay(1);
+		ibsd_mod_allchnls(dd, loc, 0x80, 0x80);
+	}
+	/* And again for good measure */
+	udelay(1);
+	ret = ibsd_mod_allchnls(dd, loc, 0, 0x80);
+	if (ret < 0)
+		ipath_dev_err(dd, "RCLKRLS failed to clear D7\n");
+	else {
+		udelay(1);
+		ibsd_mod_allchnls(dd, loc, 0x80, 0x80);
+	}
+	/* Now reset xgxs and IBC to complete the recovery */
+	dd->ipath_f_xgxs_reset(dd);
+}
+
+/*
+ * Shut down the timer that polls for relock occasions, if needed
+ * this is "hooked" from ipath_7220_quiet_serdes(), which is called
+ * just before ipath_shutdown_device() in ipath_driver.c shuts down all
+ * the other timers
+ */
+void ipath_shutdown_relock_poll(struct ipath_devdata *dd)
+{
+	struct ipath_relock *irp = &dd->ipath_relock_singleton;
+	if (atomic_read(&irp->ipath_relock_timer_active)) {
+		del_timer_sync(&irp->ipath_relock_timer);
+		atomic_set(&irp->ipath_relock_timer_active, 0);
+	}
+}
+
+static unsigned ipath_relock_by_timer = 1;
+module_param_named(relock_by_timer, ipath_relock_by_timer, uint,
+	S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(relock_by_timer, "Allow relock attempt if link not up");
+
+static void ipath_run_relock(unsigned long opaque)
+{
+	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+	struct ipath_relock *irp = &dd->ipath_relock_singleton;
+	u64 val, ltstate;
+
+	if (!(dd->ipath_flags & IPATH_INITTED)) {
+		/* Not yet up, just reenable the timer for later */
+		irp->ipath_relock_interval = HZ;
+		mod_timer(&irp->ipath_relock_timer, jiffies + HZ);
+		return;
+	}
+
+	/*
+	 * Check link-training state for "stuck" state.
+	 * if found, try relock and schedule another try at
+	 * exponentially growing delay, maxed at one second.
+	 * if not stuck, our work is done.
+	 */
+	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+	ltstate = ipath_ib_linktrstate(dd, val);
+
+	if (ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT
+		&& ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
+		int timeoff;
+		/* Not up yet. Try again, if allowed by module-param */
+		if (ipath_relock_by_timer) {
+			if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)
+				ipath_cdbg(VERBOSE, "Skip RELOCK in AUTONEG\n");
+			else if (!(dd->ipath_flags & IPATH_IB_LINK_DISABLED)) {
+				ipath_cdbg(VERBOSE, "RELOCK\n");
+				ipath_toggle_rclkrls(dd);
+			}
+		}
+		/* re-set timer for next check */
+		timeoff = irp->ipath_relock_interval << 1;
+		if (timeoff > HZ)
+			timeoff = HZ;
+		irp->ipath_relock_interval = timeoff;
+
+		mod_timer(&irp->ipath_relock_timer, jiffies + timeoff);
+	} else {
+		/* Up, so no more need to check so often */
+		mod_timer(&irp->ipath_relock_timer, jiffies + HZ);
+	}
+}
+
+void ipath_set_relock_poll(struct ipath_devdata *dd, int ibup)
+{
+	struct ipath_relock *irp = &dd->ipath_relock_singleton;
+
+	if (ibup > 0) {
+		/* we are now up, so relax timer to 1 second interval */
+		if (atomic_read(&irp->ipath_relock_timer_active))
+			mod_timer(&irp->ipath_relock_timer, jiffies + HZ);
+	} else {
+		/* Transition to down, (re-)set timer to short interval. */
+		int timeout;
+		timeout = (HZ * ((ibup == -1) ? 1000 : RELOCK_FIRST_MS))/1000;
+		if (timeout == 0)
+			timeout = 1;
+		/* If timer has not yet been started, do so. */
+		if (atomic_inc_return(&irp->ipath_relock_timer_active) == 1) {
+			init_timer(&irp->ipath_relock_timer);
+			irp->ipath_relock_timer.function = ipath_run_relock;
+			irp->ipath_relock_timer.data = (unsigned long) dd;
+			irp->ipath_relock_interval = timeout;
+			irp->ipath_relock_timer.expires = jiffies + timeout;
+			add_timer(&irp->ipath_relock_timer);
+		} else {
+			irp->ipath_relock_interval = timeout;
+			mod_timer(&irp->ipath_relock_timer, jiffies + timeout);
+			atomic_dec(&irp->ipath_relock_timer_active);
+		}
+	}
+}
+
diff --git a/drivers/infiniband/hw/ipath/ipath_sd7220_img.c b/drivers/infiniband/hw/ipath/ipath_sd7220_img.c
new file mode 100644
index 0000000..5ef59da
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_sd7220_img.c
@@ -0,0 +1,1082 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file contains the memory image from the vendor, to be copied into
+ * the IB SERDES of the IBA7220 during initialization.
+ * The file also includes the two functions which use this image.
+ */
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "ipath_kernel.h"
+#include "ipath_registers.h"
+#include "ipath_7220.h"
+
+static unsigned char ipath_sd7220_ib_img[] = {
+/*0000*/0x02, 0x0A, 0x29, 0x02, 0x0A, 0x87, 0xE5, 0xE6,
+	0x30, 0xE6, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F,
+/*0010*/0x00, 0xE5, 0xE2, 0x30, 0xE4, 0x04, 0x7E, 0x01,
+	0x80, 0x02, 0x7E, 0x00, 0xEE, 0x5F, 0x60, 0x08,
+/*0020*/0x53, 0xF9, 0xF7, 0xE4, 0xF5, 0xFE, 0x80, 0x08,
+	0x7F, 0x0A, 0x12, 0x17, 0x31, 0x12, 0x0E, 0xA2,
+/*0030*/0x75, 0xFC, 0x08, 0xE4, 0xF5, 0xFD, 0xE5, 0xE7,
+	0x20, 0xE7, 0x03, 0x43, 0xF9, 0x08, 0x22, 0x00,
+/*0040*/0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x75,
+	0x51, 0x01, 0xE4, 0xF5, 0x52, 0xF5, 0x53, 0xF5,
+/*0050*/0x52, 0xF5, 0x7E, 0x7F, 0x04, 0x02, 0x04, 0x38,
+	0xC2, 0x36, 0x05, 0x52, 0xE5, 0x52, 0xD3, 0x94,
+/*0060*/0x0C, 0x40, 0x05, 0x75, 0x52, 0x01, 0xD2, 0x36,
+	0x90, 0x07, 0x0C, 0x74, 0x07, 0xF0, 0xA3, 0x74,
+/*0070*/0xFF, 0xF0, 0xE4, 0xF5, 0x0C, 0xA3, 0xF0, 0x90,
+	0x07, 0x14, 0xF0, 0xA3, 0xF0, 0x75, 0x0B, 0x20,
+/*0080*/0xF5, 0x09, 0xE4, 0xF5, 0x08, 0xE5, 0x08, 0xD3,
+	0x94, 0x30, 0x40, 0x03, 0x02, 0x04, 0x04, 0x12,
+/*0090*/0x00, 0x06, 0x15, 0x0B, 0xE5, 0x08, 0x70, 0x04,
+	0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x09,
+/*00A0*/0x70, 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00,
+	0xEE, 0x5F, 0x60, 0x05, 0x12, 0x18, 0x71, 0xD2,
+/*00B0*/0x35, 0x53, 0xE1, 0xF7, 0xE5, 0x08, 0x45, 0x09,
+	0xFF, 0xE5, 0x0B, 0x25, 0xE0, 0x25, 0xE0, 0x24,
+/*00C0*/0x83, 0xF5, 0x82, 0xE4, 0x34, 0x07, 0xF5, 0x83,
+	0xEF, 0xF0, 0x85, 0xE2, 0x20, 0xE5, 0x52, 0xD3,
+/*00D0*/0x94, 0x01, 0x40, 0x0D, 0x12, 0x19, 0xF3, 0xE0,
+	0x54, 0xA0, 0x64, 0x40, 0x70, 0x03, 0x02, 0x03,
+/*00E0*/0xFB, 0x53, 0xF9, 0xF8, 0x90, 0x94, 0x70, 0xE4,
+	0xF0, 0xE0, 0xF5, 0x10, 0xAF, 0x09, 0x12, 0x1E,
+/*00F0*/0xB3, 0xAF, 0x08, 0xEF, 0x44, 0x08, 0xF5, 0x82,
+	0x75, 0x83, 0x80, 0xE0, 0xF5, 0x29, 0xEF, 0x44,
+/*0100*/0x07, 0x12, 0x1A, 0x3C, 0xF5, 0x22, 0x54, 0x40,
+	0xD3, 0x94, 0x00, 0x40, 0x1E, 0xE5, 0x29, 0x54,
+/*0110*/0xF0, 0x70, 0x21, 0x12, 0x19, 0xF3, 0xE0, 0x44,
+	0x80, 0xF0, 0xE5, 0x22, 0x54, 0x30, 0x65, 0x08,
+/*0120*/0x70, 0x09, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xBF,
+	0xF0, 0x80, 0x09, 0x12, 0x19, 0xF3, 0x74, 0x40,
+/*0130*/0xF0, 0x02, 0x03, 0xFB, 0x12, 0x1A, 0x12, 0x75,
+	0x83, 0xAE, 0x74, 0xFF, 0xF0, 0xAF, 0x08, 0x7E,
+/*0140*/0x00, 0xEF, 0x44, 0x07, 0xF5, 0x82, 0xE0, 0xFD,
+	0xE5, 0x0B, 0x25, 0xE0, 0x25, 0xE0, 0x24, 0x81,
+/*0150*/0xF5, 0x82, 0xE4, 0x34, 0x07, 0xF5, 0x83, 0xED,
+	0xF0, 0x90, 0x07, 0x0E, 0xE0, 0x04, 0xF0, 0xEF,
+/*0160*/0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0x98, 0xE0,
+	0xF5, 0x28, 0x12, 0x1A, 0x23, 0x40, 0x0C, 0x12,
+/*0170*/0x19, 0xF3, 0xE0, 0x44, 0x01, 0x12, 0x1A, 0x32,
+	0x02, 0x03, 0xF6, 0xAF, 0x08, 0x7E, 0x00, 0x74,
+/*0180*/0x80, 0xCD, 0xEF, 0xCD, 0x8D, 0x82, 0xF5, 0x83,
+	0xE0, 0x30, 0xE0, 0x0A, 0x12, 0x19, 0xF3, 0xE0,
+/*0190*/0x44, 0x20, 0xF0, 0x02, 0x03, 0xFB, 0x12, 0x19,
+	0xF3, 0xE0, 0x54, 0xDF, 0xF0, 0xEE, 0x44, 0xAE,
+/*01A0*/0x12, 0x1A, 0x43, 0x30, 0xE4, 0x03, 0x02, 0x03,
+	0xFB, 0x74, 0x9E, 0x12, 0x1A, 0x05, 0x20, 0xE0,
+/*01B0*/0x03, 0x02, 0x03, 0xFB, 0x8F, 0x82, 0x8E, 0x83,
+	0xE0, 0x20, 0xE0, 0x03, 0x02, 0x03, 0xFB, 0x12,
+/*01C0*/0x19, 0xF3, 0xE0, 0x44, 0x10, 0xF0, 0xE5, 0xE3,
+	0x20, 0xE7, 0x08, 0xE5, 0x08, 0x12, 0x1A, 0x3A,
+/*01D0*/0x44, 0x04, 0xF0, 0xAF, 0x08, 0x7E, 0x00, 0xEF,
+	0x12, 0x1A, 0x3A, 0x20, 0xE2, 0x34, 0x12, 0x19,
+/*01E0*/0xF3, 0xE0, 0x44, 0x08, 0xF0, 0xE5, 0xE4, 0x30,
+	0xE6, 0x04, 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00,
+/*01F0*/0xE5, 0x7E, 0xC3, 0x94, 0x04, 0x50, 0x04, 0x7C,
+	0x01, 0x80, 0x02, 0x7C, 0x00, 0xEC, 0x4D, 0x60,
+/*0200*/0x05, 0xC2, 0x35, 0x02, 0x03, 0xFB, 0xEE, 0x44,
+	0xD2, 0x12, 0x1A, 0x43, 0x44, 0x40, 0xF0, 0x02,
+/*0210*/0x03, 0xFB, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xF7,
+	0xF0, 0x12, 0x1A, 0x12, 0x75, 0x83, 0xD2, 0xE0,
+/*0220*/0x54, 0xBF, 0xF0, 0x90, 0x07, 0x14, 0xE0, 0x04,
+	0xF0, 0xE5, 0x7E, 0x70, 0x03, 0x75, 0x7E, 0x01,
+/*0230*/0xAF, 0x08, 0x7E, 0x00, 0x12, 0x1A, 0x23, 0x40,
+	0x12, 0x12, 0x19, 0xF3, 0xE0, 0x44, 0x01, 0x12,
+/*0240*/0x19, 0xF2, 0xE0, 0x54, 0x02, 0x12, 0x1A, 0x32,
+	0x02, 0x03, 0xFB, 0x12, 0x19, 0xF3, 0xE0, 0x44,
+/*0250*/0x02, 0x12, 0x19, 0xF2, 0xE0, 0x54, 0xFE, 0xF0,
+	0xC2, 0x35, 0xEE, 0x44, 0x8A, 0x8F, 0x82, 0xF5,
+/*0260*/0x83, 0xE0, 0xF5, 0x17, 0x54, 0x8F, 0x44, 0x40,
+	0xF0, 0x74, 0x90, 0xFC, 0xE5, 0x08, 0x44, 0x07,
+/*0270*/0xFD, 0xF5, 0x82, 0x8C, 0x83, 0xE0, 0x54, 0x3F,
+	0x90, 0x07, 0x02, 0xF0, 0xE0, 0x54, 0xC0, 0x8D,
+/*0280*/0x82, 0x8C, 0x83, 0xF0, 0x74, 0x92, 0x12, 0x1A,
+	0x05, 0x90, 0x07, 0x03, 0x12, 0x1A, 0x19, 0x74,
+/*0290*/0x82, 0x12, 0x1A, 0x05, 0x90, 0x07, 0x04, 0x12,
+	0x1A, 0x19, 0x74, 0xB4, 0x12, 0x1A, 0x05, 0x90,
+/*02A0*/0x07, 0x05, 0x12, 0x1A, 0x19, 0x74, 0x94, 0xFE,
+	0xE5, 0x08, 0x44, 0x06, 0x12, 0x1A, 0x0A, 0xF5,
+/*02B0*/0x10, 0x30, 0xE0, 0x04, 0xD2, 0x37, 0x80, 0x02,
+	0xC2, 0x37, 0xE5, 0x10, 0x54, 0x7F, 0x8F, 0x82,
+/*02C0*/0x8E, 0x83, 0xF0, 0x30, 0x44, 0x30, 0x12, 0x1A,
+	0x03, 0x54, 0x80, 0xD3, 0x94, 0x00, 0x40, 0x04,
+/*02D0*/0xD2, 0x39, 0x80, 0x02, 0xC2, 0x39, 0x8F, 0x82,
+	0x8E, 0x83, 0xE0, 0x44, 0x80, 0xF0, 0x12, 0x1A,
+/*02E0*/0x03, 0x54, 0x40, 0xD3, 0x94, 0x00, 0x40, 0x04,
+	0xD2, 0x3A, 0x80, 0x02, 0xC2, 0x3A, 0x8F, 0x82,
+/*02F0*/0x8E, 0x83, 0xE0, 0x44, 0x40, 0xF0, 0x74, 0x92,
+	0xFE, 0xE5, 0x08, 0x44, 0x06, 0x12, 0x1A, 0x0A,
+/*0300*/0x30, 0xE7, 0x04, 0xD2, 0x38, 0x80, 0x02, 0xC2,
+	0x38, 0x8F, 0x82, 0x8E, 0x83, 0xE0, 0x54, 0x7F,
+/*0310*/0xF0, 0x12, 0x1E, 0x46, 0xE4, 0xF5, 0x0A, 0x20,
+	0x03, 0x02, 0x80, 0x03, 0x30, 0x43, 0x03, 0x12,
+/*0320*/0x19, 0x95, 0x20, 0x02, 0x02, 0x80, 0x03, 0x30,
+	0x42, 0x03, 0x12, 0x0C, 0x8F, 0x30, 0x30, 0x06,
+/*0330*/0x12, 0x19, 0x95, 0x12, 0x0C, 0x8F, 0x12, 0x0D,
+	0x47, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xFB, 0xF0,
+/*0340*/0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40, 0x46, 0x43,
+	0xE1, 0x08, 0x12, 0x19, 0xF3, 0xE0, 0x44, 0x04,
+/*0350*/0xF0, 0xE5, 0xE4, 0x20, 0xE7, 0x2A, 0x12, 0x1A,
+	0x12, 0x75, 0x83, 0xD2, 0xE0, 0x54, 0x08, 0xD3,
+/*0360*/0x94, 0x00, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
+	0x7F, 0x00, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40,
+/*0370*/0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF,
+	0x5E, 0x60, 0x05, 0x12, 0x1D, 0xD7, 0x80, 0x17,
+/*0380*/0x12, 0x1A, 0x12, 0x75, 0x83, 0xD2, 0xE0, 0x44,
+	0x08, 0xF0, 0x02, 0x03, 0xFB, 0x12, 0x1A, 0x12,
+/*0390*/0x75, 0x83, 0xD2, 0xE0, 0x54, 0xF7, 0xF0, 0x12,
+	0x1E, 0x46, 0x7F, 0x08, 0x12, 0x17, 0x31, 0x74,
+/*03A0*/0x8E, 0xFE, 0x12, 0x1A, 0x12, 0x8E, 0x83, 0xE0,
+	0xF5, 0x10, 0x54, 0xFE, 0xF0, 0xE5, 0x10, 0x44,
+/*03B0*/0x01, 0xFF, 0xE5, 0x08, 0xFD, 0xED, 0x44, 0x07,
+	0xF5, 0x82, 0xEF, 0xF0, 0xE5, 0x10, 0x54, 0xFE,
+/*03C0*/0xFF, 0xED, 0x44, 0x07, 0xF5, 0x82, 0xEF, 0x12,
+	0x1A, 0x11, 0x75, 0x83, 0x86, 0xE0, 0x44, 0x10,
+/*03D0*/0x12, 0x1A, 0x11, 0xE0, 0x44, 0x10, 0xF0, 0x12,
+	0x19, 0xF3, 0xE0, 0x54, 0xFD, 0x44, 0x01, 0xFF,
+/*03E0*/0x12, 0x19, 0xF3, 0xEF, 0x12, 0x1A, 0x32, 0x30,
+	0x32, 0x0C, 0xE5, 0x08, 0x44, 0x08, 0xF5, 0x82,
+/*03F0*/0x75, 0x83, 0x82, 0x74, 0x05, 0xF0, 0xAF, 0x0B,
+	0x12, 0x18, 0xD7, 0x74, 0x10, 0x25, 0x08, 0xF5,
+/*0400*/0x08, 0x02, 0x00, 0x85, 0x05, 0x09, 0xE5, 0x09,
+	0xD3, 0x94, 0x07, 0x50, 0x03, 0x02, 0x00, 0x82,
+/*0410*/0xE5, 0x7E, 0xD3, 0x94, 0x00, 0x40, 0x04, 0x7F,
+	0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x7E, 0xC3,
+/*0420*/0x94, 0xFA, 0x50, 0x04, 0x7E, 0x01, 0x80, 0x02,
+	0x7E, 0x00, 0xEE, 0x5F, 0x60, 0x02, 0x05, 0x7E,
+/*0430*/0x30, 0x35, 0x0B, 0x43, 0xE1, 0x01, 0x7F, 0x09,
+	0x12, 0x17, 0x31, 0x02, 0x00, 0x58, 0x53, 0xE1,
+/*0440*/0xFE, 0x02, 0x00, 0x58, 0x8E, 0x6A, 0x8F, 0x6B,
+	0x8C, 0x6C, 0x8D, 0x6D, 0x75, 0x6E, 0x01, 0x75,
+/*0450*/0x6F, 0x01, 0x75, 0x70, 0x01, 0xE4, 0xF5, 0x73,
+	0xF5, 0x74, 0xF5, 0x75, 0x90, 0x07, 0x2F, 0xF0,
+/*0460*/0xF5, 0x3C, 0xF5, 0x3E, 0xF5, 0x46, 0xF5, 0x47,
+	0xF5, 0x3D, 0xF5, 0x3F, 0xF5, 0x6F, 0xE5, 0x6F,
+/*0470*/0x70, 0x0F, 0xE5, 0x6B, 0x45, 0x6A, 0x12, 0x07,
+	0x2A, 0x75, 0x83, 0x80, 0x74, 0x3A, 0xF0, 0x80,
+/*0480*/0x09, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x80, 0x74,
+	0x1A, 0xF0, 0xE4, 0xF5, 0x6E, 0xC3, 0x74, 0x3F,
+/*0490*/0x95, 0x6E, 0xFF, 0x12, 0x08, 0x65, 0x75, 0x83,
+	0x82, 0xEF, 0xF0, 0x12, 0x1A, 0x4D, 0x12, 0x08,
+/*04A0*/0xC6, 0xE5, 0x33, 0xF0, 0x12, 0x08, 0xFA, 0x12,
+	0x08, 0xB1, 0x40, 0xE1, 0xE5, 0x6F, 0x70, 0x0B,
+/*04B0*/0x12, 0x07, 0x2A, 0x75, 0x83, 0x80, 0x74, 0x36,
+	0xF0, 0x80, 0x09, 0x12, 0x07, 0x2A, 0x75, 0x83,
+/*04C0*/0x80, 0x74, 0x16, 0xF0, 0x75, 0x6E, 0x01, 0x12,
+	0x07, 0x2A, 0x75, 0x83, 0xB4, 0xE5, 0x6E, 0xF0,
+/*04D0*/0x12, 0x1A, 0x4D, 0x74, 0x3F, 0x25, 0x6E, 0xF5,
+	0x82, 0xE4, 0x34, 0x00, 0xF5, 0x83, 0xE5, 0x33,
+/*04E0*/0xF0, 0x74, 0xBF, 0x25, 0x6E, 0xF5, 0x82, 0xE4,
+	0x34, 0x00, 0x12, 0x08, 0xB1, 0x40, 0xD8, 0xE4,
+/*04F0*/0xF5, 0x70, 0xF5, 0x46, 0xF5, 0x47, 0xF5, 0x6E,
+	0x12, 0x08, 0xFA, 0xF5, 0x83, 0xE0, 0xFE, 0x12,
+/*0500*/0x08, 0xC6, 0xE0, 0x7C, 0x00, 0x24, 0x00, 0xFF,
+	0xEC, 0x3E, 0xFE, 0xAD, 0x3B, 0xD3, 0xEF, 0x9D,
+/*0510*/0xEE, 0x9C, 0x50, 0x04, 0x7B, 0x01, 0x80, 0x02,
+	0x7B, 0x00, 0xE5, 0x70, 0x70, 0x04, 0x7A, 0x01,
+/*0520*/0x80, 0x02, 0x7A, 0x00, 0xEB, 0x5A, 0x60, 0x06,
+	0x85, 0x6E, 0x46, 0x75, 0x70, 0x01, 0xD3, 0xEF,
+/*0530*/0x9D, 0xEE, 0x9C, 0x50, 0x04, 0x7F, 0x01, 0x80,
+	0x02, 0x7F, 0x00, 0xE5, 0x70, 0xB4, 0x01, 0x04,
+/*0540*/0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF, 0x5E,
+	0x60, 0x03, 0x85, 0x6E, 0x47, 0x05, 0x6E, 0xE5,
+/*0550*/0x6E, 0x64, 0x7F, 0x70, 0xA3, 0xE5, 0x46, 0x60,
+	0x05, 0xE5, 0x47, 0xB4, 0x7E, 0x03, 0x85, 0x46,
+/*0560*/0x47, 0xE5, 0x6F, 0x70, 0x08, 0x85, 0x46, 0x76,
+	0x85, 0x47, 0x77, 0x80, 0x0E, 0xC3, 0x74, 0x7F,
+/*0570*/0x95, 0x46, 0xF5, 0x78, 0xC3, 0x74, 0x7F, 0x95,
+	0x47, 0xF5, 0x79, 0xE5, 0x6F, 0x70, 0x37, 0xE5,
+/*0580*/0x46, 0x65, 0x47, 0x70, 0x0C, 0x75, 0x73, 0x01,
+	0x75, 0x74, 0x01, 0xF5, 0x3C, 0xF5, 0x3D, 0x80,
+/*0590*/0x35, 0xE4, 0xF5, 0x4E, 0xC3, 0xE5, 0x47, 0x95,
+	0x46, 0xF5, 0x3C, 0xC3, 0x13, 0xF5, 0x71, 0x25,
+/*05A0*/0x46, 0xF5, 0x72, 0xC3, 0x94, 0x3F, 0x40, 0x05,
+	0xE4, 0xF5, 0x3D, 0x80, 0x40, 0xC3, 0x74, 0x3F,
+/*05B0*/0x95, 0x72, 0xF5, 0x3D, 0x80, 0x37, 0xE5, 0x46,
+	0x65, 0x47, 0x70, 0x0F, 0x75, 0x73, 0x01, 0x75,
+/*05C0*/0x75, 0x01, 0xF5, 0x3E, 0xF5, 0x3F, 0x75, 0x4E,
+	0x01, 0x80, 0x22, 0xE4, 0xF5, 0x4E, 0xC3, 0xE5,
+/*05D0*/0x47, 0x95, 0x46, 0xF5, 0x3E, 0xC3, 0x13, 0xF5,
+	0x71, 0x25, 0x46, 0xF5, 0x72, 0xD3, 0x94, 0x3F,
+/*05E0*/0x50, 0x05, 0xE4, 0xF5, 0x3F, 0x80, 0x06, 0xE5,
+	0x72, 0x24, 0xC1, 0xF5, 0x3F, 0x05, 0x6F, 0xE5,
+/*05F0*/0x6F, 0xC3, 0x94, 0x02, 0x50, 0x03, 0x02, 0x04,
+	0x6E, 0xE5, 0x6D, 0x45, 0x6C, 0x70, 0x02, 0x80,
+/*0600*/0x04, 0xE5, 0x74, 0x45, 0x75, 0x90, 0x07, 0x2F,
+	0xF0, 0x7F, 0x01, 0xE5, 0x3E, 0x60, 0x04, 0xE5,
+/*0610*/0x3C, 0x70, 0x14, 0xE4, 0xF5, 0x3C, 0xF5, 0x3D,
+	0xF5, 0x3E, 0xF5, 0x3F, 0x12, 0x08, 0xD2, 0x70,
+/*0620*/0x04, 0xF0, 0x02, 0x06, 0xA4, 0x80, 0x7A, 0xE5,
+	0x3C, 0xC3, 0x95, 0x3E, 0x40, 0x07, 0xE5, 0x3C,
+/*0630*/0x95, 0x3E, 0xFF, 0x80, 0x06, 0xC3, 0xE5, 0x3E,
+	0x95, 0x3C, 0xFF, 0xE5, 0x76, 0xD3, 0x95, 0x79,
+/*0640*/0x40, 0x05, 0x85, 0x76, 0x7A, 0x80, 0x03, 0x85,
+	0x79, 0x7A, 0xE5, 0x77, 0xC3, 0x95, 0x78, 0x50,
+/*0650*/0x05, 0x85, 0x77, 0x7B, 0x80, 0x03, 0x85, 0x78,
+	0x7B, 0xE5, 0x7B, 0xD3, 0x95, 0x7A, 0x40, 0x30,
+/*0660*/0xE5, 0x7B, 0x95, 0x7A, 0xF5, 0x3C, 0xF5, 0x3E,
+	0xC3, 0xE5, 0x7B, 0x95, 0x7A, 0x90, 0x07, 0x19,
+/*0670*/0xF0, 0xE5, 0x3C, 0xC3, 0x13, 0xF5, 0x71, 0x25,
+	0x7A, 0xF5, 0x72, 0xC3, 0x94, 0x3F, 0x40, 0x05,
+/*0680*/0xE4, 0xF5, 0x3D, 0x80, 0x1F, 0xC3, 0x74, 0x3F,
+	0x95, 0x72, 0xF5, 0x3D, 0xF5, 0x3F, 0x80, 0x14,
+/*0690*/0xE4, 0xF5, 0x3C, 0xF5, 0x3E, 0x90, 0x07, 0x19,
+	0xF0, 0x12, 0x08, 0xD2, 0x70, 0x03, 0xF0, 0x80,
+/*06A0*/0x03, 0x74, 0x01, 0xF0, 0x12, 0x08, 0x65, 0x75,
+	0x83, 0xD0, 0xE0, 0x54, 0x0F, 0xFE, 0xAD, 0x3C,
+/*06B0*/0x70, 0x02, 0x7E, 0x07, 0xBE, 0x0F, 0x02, 0x7E,
+	0x80, 0xEE, 0xFB, 0xEF, 0xD3, 0x9B, 0x74, 0x80,
+/*06C0*/0xF8, 0x98, 0x40, 0x1F, 0xE4, 0xF5, 0x3C, 0xF5,
+	0x3E, 0x12, 0x08, 0xD2, 0x70, 0x03, 0xF0, 0x80,
+/*06D0*/0x12, 0x74, 0x01, 0xF0, 0xE5, 0x08, 0xFB, 0xEB,
+	0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xD2, 0xE0,
+/*06E0*/0x44, 0x10, 0xF0, 0xE5, 0x08, 0xFB, 0xEB, 0x44,
+	0x09, 0xF5, 0x82, 0x75, 0x83, 0x9E, 0xED, 0xF0,
+/*06F0*/0xEB, 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xCA,
+	0xED, 0xF0, 0x12, 0x08, 0x65, 0x75, 0x83, 0xCC,
+/*0700*/0xEF, 0xF0, 0x22, 0xE5, 0x08, 0x44, 0x07, 0xF5,
+	0x82, 0x75, 0x83, 0xBC, 0xE0, 0x54, 0xF0, 0xF0,
+/*0710*/0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83,
+	0xBE, 0xE0, 0x54, 0xF0, 0xF0, 0xE5, 0x08, 0x44,
+/*0720*/0x07, 0xF5, 0x82, 0x75, 0x83, 0xC0, 0xE0, 0x54,
+	0xF0, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82,
+/*0730*/0x22, 0xF0, 0x90, 0x07, 0x28, 0xE0, 0xFE, 0xA3,
+	0xE0, 0xF5, 0x82, 0x8E, 0x83, 0x22, 0x85, 0x42,
+/*0740*/0x42, 0x85, 0x41, 0x41, 0x85, 0x40, 0x40, 0x74,
+	0xC0, 0x2F, 0xF5, 0x82, 0x74, 0x02, 0x3E, 0xF5,
+/*0750*/0x83, 0xE5, 0x42, 0xF0, 0x74, 0xE0, 0x2F, 0xF5,
+	0x82, 0x74, 0x02, 0x3E, 0xF5, 0x83, 0x22, 0xE5,
+/*0760*/0x42, 0x29, 0xFD, 0xE4, 0x33, 0xFC, 0xE5, 0x3C,
+	0xC3, 0x9D, 0xEC, 0x64, 0x80, 0xF8, 0x74, 0x80,
+/*0770*/0x98, 0x22, 0xF5, 0x83, 0xE0, 0x90, 0x07, 0x22,
+	0x54, 0x1F, 0xFD, 0xE0, 0xFA, 0xA3, 0xE0, 0xF5,
+/*0780*/0x82, 0x8A, 0x83, 0xED, 0xF0, 0x22, 0x90, 0x07,
+	0x22, 0xE0, 0xFC, 0xA3, 0xE0, 0xF5, 0x82, 0x8C,
+/*0790*/0x83, 0x22, 0x90, 0x07, 0x24, 0xFF, 0xED, 0x44,
+	0x07, 0xCF, 0xF0, 0xA3, 0xEF, 0xF0, 0x22, 0x85,
+/*07A0*/0x38, 0x38, 0x85, 0x39, 0x39, 0x85, 0x3A, 0x3A,
+	0x74, 0xC0, 0x2F, 0xF5, 0x82, 0x74, 0x02, 0x3E,
+/*07B0*/0xF5, 0x83, 0x22, 0x90, 0x07, 0x26, 0xFF, 0xED,
+	0x44, 0x07, 0xCF, 0xF0, 0xA3, 0xEF, 0xF0, 0x22,
+/*07C0*/0xF0, 0x74, 0xA0, 0x2F, 0xF5, 0x82, 0x74, 0x02,
+	0x3E, 0xF5, 0x83, 0x22, 0x74, 0xC0, 0x25, 0x11,
+/*07D0*/0xF5, 0x82, 0xE4, 0x34, 0x01, 0xF5, 0x83, 0x22,
+	0x74, 0x00, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
+/*07E0*/0x02, 0xF5, 0x83, 0x22, 0x74, 0x60, 0x25, 0x11,
+	0xF5, 0x82, 0xE4, 0x34, 0x03, 0xF5, 0x83, 0x22,
+/*07F0*/0x74, 0x80, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
+	0x03, 0xF5, 0x83, 0x22, 0x74, 0xE0, 0x25, 0x11,
+/*0800*/0xF5, 0x82, 0xE4, 0x34, 0x03, 0xF5, 0x83, 0x22,
+	0x74, 0x40, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
+/*0810*/0x06, 0xF5, 0x83, 0x22, 0x74, 0x80, 0x2F, 0xF5,
+	0x82, 0x74, 0x02, 0x3E, 0xF5, 0x83, 0x22, 0xAF,
+/*0820*/0x08, 0x7E, 0x00, 0xEF, 0x44, 0x07, 0xF5, 0x82,
+	0x22, 0xF5, 0x83, 0xE5, 0x82, 0x44, 0x07, 0xF5,
+/*0830*/0x82, 0xE5, 0x40, 0xF0, 0x22, 0x74, 0x40, 0x25,
+	0x11, 0xF5, 0x82, 0xE4, 0x34, 0x02, 0xF5, 0x83,
+/*0840*/0x22, 0x74, 0xC0, 0x25, 0x11, 0xF5, 0x82, 0xE4,
+	0x34, 0x03, 0xF5, 0x83, 0x22, 0x74, 0x00, 0x25,
+/*0850*/0x11, 0xF5, 0x82, 0xE4, 0x34, 0x06, 0xF5, 0x83,
+	0x22, 0x74, 0x20, 0x25, 0x11, 0xF5, 0x82, 0xE4,
+/*0860*/0x34, 0x06, 0xF5, 0x83, 0x22, 0xE5, 0x08, 0xFD,
+	0xED, 0x44, 0x07, 0xF5, 0x82, 0x22, 0xE5, 0x41,
+/*0870*/0xF0, 0xE5, 0x65, 0x64, 0x01, 0x45, 0x64, 0x22,
+	0x7E, 0x00, 0xFB, 0x7A, 0x00, 0xFD, 0x7C, 0x00,
+/*0880*/0x22, 0x74, 0x20, 0x25, 0x11, 0xF5, 0x82, 0xE4,
+	0x34, 0x02, 0x22, 0x74, 0xA0, 0x25, 0x11, 0xF5,
+/*0890*/0x82, 0xE4, 0x34, 0x03, 0x22, 0x85, 0x3E, 0x42,
+	0x85, 0x3F, 0x41, 0x8F, 0x40, 0x22, 0x85, 0x3C,
+/*08A0*/0x42, 0x85, 0x3D, 0x41, 0x8F, 0x40, 0x22, 0x75,
+	0x45, 0x3F, 0x90, 0x07, 0x20, 0xE4, 0xF0, 0xA3,
+/*08B0*/0x22, 0xF5, 0x83, 0xE5, 0x32, 0xF0, 0x05, 0x6E,
+	0xE5, 0x6E, 0xC3, 0x94, 0x40, 0x22, 0xF0, 0xE5,
+/*08C0*/0x08, 0x44, 0x06, 0xF5, 0x82, 0x22, 0x74, 0x00,
+	0x25, 0x6E, 0xF5, 0x82, 0xE4, 0x34, 0x00, 0xF5,
+/*08D0*/0x83, 0x22, 0xE5, 0x6D, 0x45, 0x6C, 0x90, 0x07,
+	0x2F, 0x22, 0xE4, 0xF9, 0xE5, 0x3C, 0xD3, 0x95,
+/*08E0*/0x3E, 0x22, 0x74, 0x80, 0x2E, 0xF5, 0x82, 0xE4,
+	0x34, 0x02, 0xF5, 0x83, 0xE0, 0x22, 0x74, 0xA0,
+/*08F0*/0x2E, 0xF5, 0x82, 0xE4, 0x34, 0x02, 0xF5, 0x83,
+	0xE0, 0x22, 0x74, 0x80, 0x25, 0x6E, 0xF5, 0x82,
+/*0900*/0xE4, 0x34, 0x00, 0x22, 0x25, 0x42, 0xFD, 0xE4,
+	0x33, 0xFC, 0x22, 0x85, 0x42, 0x42, 0x85, 0x41,
+/*0910*/0x41, 0x85, 0x40, 0x40, 0x22, 0xED, 0x4C, 0x60,
+	0x03, 0x02, 0x09, 0xE5, 0xEF, 0x4E, 0x70, 0x37,
+/*0920*/0x90, 0x07, 0x26, 0x12, 0x07, 0x89, 0xE0, 0xFD,
+	0x12, 0x07, 0xCC, 0xED, 0xF0, 0x90, 0x07, 0x28,
+/*0930*/0x12, 0x07, 0x89, 0xE0, 0xFD, 0x12, 0x07, 0xD8,
+	0xED, 0xF0, 0x12, 0x07, 0x86, 0xE0, 0x54, 0x1F,
+/*0940*/0xFD, 0x12, 0x08, 0x81, 0xF5, 0x83, 0xED, 0xF0,
+	0x90, 0x07, 0x24, 0x12, 0x07, 0x89, 0xE0, 0x54,
+/*0950*/0x1F, 0xFD, 0x12, 0x08, 0x35, 0xED, 0xF0, 0xEF,
+	0x64, 0x04, 0x4E, 0x70, 0x37, 0x90, 0x07, 0x26,
+/*0960*/0x12, 0x07, 0x89, 0xE0, 0xFD, 0x12, 0x07, 0xE4,
+	0xED, 0xF0, 0x90, 0x07, 0x28, 0x12, 0x07, 0x89,
+/*0970*/0xE0, 0xFD, 0x12, 0x07, 0xF0, 0xED, 0xF0, 0x12,
+	0x07, 0x86, 0xE0, 0x54, 0x1F, 0xFD, 0x12, 0x08,
+/*0980*/0x8B, 0xF5, 0x83, 0xED, 0xF0, 0x90, 0x07, 0x24,
+	0x12, 0x07, 0x89, 0xE0, 0x54, 0x1F, 0xFD, 0x12,
+/*0990*/0x08, 0x41, 0xED, 0xF0, 0xEF, 0x64, 0x01, 0x4E,
+	0x70, 0x04, 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00,
+/*09A0*/0xEF, 0x64, 0x02, 0x4E, 0x70, 0x04, 0x7F, 0x01,
+	0x80, 0x02, 0x7F, 0x00, 0xEF, 0x4D, 0x60, 0x78,
+/*09B0*/0x90, 0x07, 0x26, 0x12, 0x07, 0x35, 0xE0, 0xFF,
+	0x12, 0x07, 0xFC, 0xEF, 0x12, 0x07, 0x31, 0xE0,
+/*09C0*/0xFF, 0x12, 0x08, 0x08, 0xEF, 0xF0, 0x90, 0x07,
+	0x22, 0x12, 0x07, 0x35, 0xE0, 0x54, 0x1F, 0xFF,
+/*09D0*/0x12, 0x08, 0x4D, 0xEF, 0xF0, 0x90, 0x07, 0x24,
+	0x12, 0x07, 0x35, 0xE0, 0x54, 0x1F, 0xFF, 0x12,
+/*09E0*/0x08, 0x59, 0xEF, 0xF0, 0x22, 0x12, 0x07, 0xCC,
+	0xE4, 0xF0, 0x12, 0x07, 0xD8, 0xE4, 0xF0, 0x12,
+/*09F0*/0x08, 0x81, 0xF5, 0x83, 0xE4, 0xF0, 0x12, 0x08,
+	0x35, 0x74, 0x14, 0xF0, 0x12, 0x07, 0xE4, 0xE4,
+/*0A00*/0xF0, 0x12, 0x07, 0xF0, 0xE4, 0xF0, 0x12, 0x08,
+	0x8B, 0xF5, 0x83, 0xE4, 0xF0, 0x12, 0x08, 0x41,
+/*0A10*/0x74, 0x14, 0xF0, 0x12, 0x07, 0xFC, 0xE4, 0xF0,
+	0x12, 0x08, 0x08, 0xE4, 0xF0, 0x12, 0x08, 0x4D,
+/*0A20*/0xE4, 0xF0, 0x12, 0x08, 0x59, 0x74, 0x14, 0xF0,
+	0x22, 0x53, 0xF9, 0xF7, 0x75, 0xFC, 0x10, 0xE4,
+/*0A30*/0xF5, 0xFD, 0x75, 0xFE, 0x30, 0xF5, 0xFF, 0xE5,
+	0xE7, 0x20, 0xE7, 0x03, 0x43, 0xF9, 0x08, 0xE5,
+/*0A40*/0xE6, 0x20, 0xE7, 0x0B, 0x78, 0xFF, 0xE4, 0xF6,
+	0xD8, 0xFD, 0x53, 0xE6, 0xFE, 0x80, 0x09, 0x78,
+/*0A50*/0x08, 0xE4, 0xF6, 0xD8, 0xFD, 0x53, 0xE6, 0xFE,
+	0x75, 0x81, 0x80, 0xE4, 0xF5, 0xA8, 0xD2, 0xA8,
+/*0A60*/0xC2, 0xA9, 0xD2, 0xAF, 0xE5, 0xE2, 0x20, 0xE5,
+	0x05, 0x20, 0xE6, 0x02, 0x80, 0x03, 0x43, 0xE1,
+/*0A70*/0x02, 0xE5, 0xE2, 0x20, 0xE0, 0x0E, 0x90, 0x00,
+	0x00, 0x7F, 0x00, 0x7E, 0x08, 0xE4, 0xF0, 0xA3,
+/*0A80*/0xDF, 0xFC, 0xDE, 0xFA, 0x02, 0x0A, 0xDB, 0x43,
+	0xFA, 0x01, 0xC0, 0xE0, 0xC0, 0xF0, 0xC0, 0x83,
+/*0A90*/0xC0, 0x82, 0xC0, 0xD0, 0x12, 0x1C, 0xE7, 0xD0,
+	0xD0, 0xD0, 0x82, 0xD0, 0x83, 0xD0, 0xF0, 0xD0,
+/*0AA0*/0xE0, 0x53, 0xFA, 0xFE, 0x32, 0x02, 0x1B, 0x55,
+	0xE4, 0x93, 0xA3, 0xF8, 0xE4, 0x93, 0xA3, 0xF6,
+/*0AB0*/0x08, 0xDF, 0xF9, 0x80, 0x29, 0xE4, 0x93, 0xA3,
+	0xF8, 0x54, 0x07, 0x24, 0x0C, 0xC8, 0xC3, 0x33,
+/*0AC0*/0xC4, 0x54, 0x0F, 0x44, 0x20, 0xC8, 0x83, 0x40,
+	0x04, 0xF4, 0x56, 0x80, 0x01, 0x46, 0xF6, 0xDF,
+/*0AD0*/0xE4, 0x80, 0x0B, 0x01, 0x02, 0x04, 0x08, 0x10,
+	0x20, 0x40, 0x80, 0x90, 0x00, 0x3F, 0xE4, 0x7E,
+/*0AE0*/0x01, 0x93, 0x60, 0xC1, 0xA3, 0xFF, 0x54, 0x3F,
+	0x30, 0xE5, 0x09, 0x54, 0x1F, 0xFE, 0xE4, 0x93,
+/*0AF0*/0xA3, 0x60, 0x01, 0x0E, 0xCF, 0x54, 0xC0, 0x25,
+	0xE0, 0x60, 0xAD, 0x40, 0xB8, 0x80, 0xFE, 0x8C,
+/*0B00*/0x64, 0x8D, 0x65, 0x8A, 0x66, 0x8B, 0x67, 0xE4,
+	0xF5, 0x69, 0xEF, 0x4E, 0x70, 0x03, 0x02, 0x1D,
+/*0B10*/0x55, 0xE4, 0xF5, 0x68, 0xE5, 0x67, 0x45, 0x66,
+	0x70, 0x32, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x90,
+/*0B20*/0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE4,
+	0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0xE4, 0x12,
+/*0B30*/0x08, 0x70, 0x70, 0x29, 0x12, 0x07, 0x2A, 0x75,
+	0x83, 0x92, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*0B40*/0xC6, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC8,
+	0xE4, 0xF0, 0x80, 0x11, 0x90, 0x07, 0x26, 0x12,
+/*0B50*/0x07, 0x35, 0xE4, 0x12, 0x08, 0x70, 0x70, 0x05,
+	0x12, 0x07, 0x32, 0xE4, 0xF0, 0x12, 0x1D, 0x55,
+/*0B60*/0x12, 0x1E, 0xBF, 0xE5, 0x67, 0x45, 0x66, 0x70,
+	0x33, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x90, 0xE5,
+/*0B70*/0x41, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE5,
+	0x41, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0x12,
+/*0B80*/0x08, 0x6E, 0x70, 0x29, 0x12, 0x07, 0x2A, 0x75,
+	0x83, 0x92, 0xE5, 0x40, 0x12, 0x07, 0x29, 0x75,
+/*0B90*/0x83, 0xC6, 0xE5, 0x40, 0x12, 0x07, 0x29, 0x75,
+	0x83, 0xC8, 0x80, 0x0E, 0x90, 0x07, 0x26, 0x12,
+/*0BA0*/0x07, 0x35, 0x12, 0x08, 0x6E, 0x70, 0x06, 0x12,
+	0x07, 0x32, 0xE5, 0x40, 0xF0, 0xAF, 0x69, 0x7E,
+/*0BB0*/0x00, 0xAD, 0x67, 0xAC, 0x66, 0x12, 0x04, 0x44,
+	0x12, 0x07, 0x2A, 0x75, 0x83, 0xCA, 0xE0, 0xD3,
+/*0BC0*/0x94, 0x00, 0x50, 0x0C, 0x05, 0x68, 0xE5, 0x68,
+	0xC3, 0x94, 0x05, 0x50, 0x03, 0x02, 0x0B, 0x14,
+/*0BD0*/0x22, 0x8C, 0x60, 0x8D, 0x61, 0x12, 0x08, 0xDA,
+	0x74, 0x20, 0x40, 0x0D, 0x2F, 0xF5, 0x82, 0x74,
+/*0BE0*/0x03, 0x3E, 0xF5, 0x83, 0xE5, 0x3E, 0xF0, 0x80,
+	0x0B, 0x2F, 0xF5, 0x82, 0x74, 0x03, 0x3E, 0xF5,
+/*0BF0*/0x83, 0xE5, 0x3C, 0xF0, 0xE5, 0x3C, 0xD3, 0x95,
+	0x3E, 0x40, 0x3C, 0xE5, 0x61, 0x45, 0x60, 0x70,
+/*0C00*/0x10, 0xE9, 0x12, 0x09, 0x04, 0xE5, 0x3E, 0x12,
+	0x07, 0x68, 0x40, 0x3B, 0x12, 0x08, 0x95, 0x80,
+/*0C10*/0x18, 0xE5, 0x3E, 0xC3, 0x95, 0x38, 0x40, 0x1D,
+	0x85, 0x3E, 0x38, 0xE5, 0x3E, 0x60, 0x05, 0x85,
+/*0C20*/0x3F, 0x39, 0x80, 0x03, 0x85, 0x39, 0x39, 0x8F,
+	0x3A, 0x12, 0x08, 0x14, 0xE5, 0x3E, 0x12, 0x07,
+/*0C30*/0xC0, 0xE5, 0x3F, 0xF0, 0x22, 0x80, 0x43, 0xE5,
+	0x61, 0x45, 0x60, 0x70, 0x19, 0x12, 0x07, 0x5F,
+/*0C40*/0x40, 0x05, 0x12, 0x08, 0x9E, 0x80, 0x27, 0x12,
+	0x09, 0x0B, 0x12, 0x08, 0x14, 0xE5, 0x42, 0x12,
+/*0C50*/0x07, 0xC0, 0xE5, 0x41, 0xF0, 0x22, 0xE5, 0x3C,
+	0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3C, 0x38,
+/*0C60*/0xE5, 0x3C, 0x60, 0x05, 0x85, 0x3D, 0x39, 0x80,
+	0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12, 0x08,
+/*0C70*/0x14, 0xE5, 0x3C, 0x12, 0x07, 0xC0, 0xE5, 0x3D,
+	0xF0, 0x22, 0x85, 0x38, 0x38, 0x85, 0x39, 0x39,
+/*0C80*/0x85, 0x3A, 0x3A, 0x12, 0x08, 0x14, 0xE5, 0x38,
+	0x12, 0x07, 0xC0, 0xE5, 0x39, 0xF0, 0x22, 0x7F,
+/*0C90*/0x06, 0x12, 0x17, 0x31, 0x12, 0x1D, 0x23, 0x12,
+	0x0E, 0x04, 0x12, 0x0E, 0x33, 0xE0, 0x44, 0x0A,
+/*0CA0*/0xF0, 0x74, 0x8E, 0xFE, 0x12, 0x0E, 0x04, 0x12,
+	0x0E, 0x0B, 0xEF, 0xF0, 0xE5, 0x28, 0x30, 0xE5,
+/*0CB0*/0x03, 0xD3, 0x80, 0x01, 0xC3, 0x40, 0x05, 0x75,
+	0x14, 0x20, 0x80, 0x03, 0x75, 0x14, 0x08, 0x12,
+/*0CC0*/0x0E, 0x04, 0x75, 0x83, 0x8A, 0xE5, 0x14, 0xF0,
+	0xB4, 0xFF, 0x05, 0x75, 0x12, 0x80, 0x80, 0x06,
+/*0CD0*/0xE5, 0x14, 0xC3, 0x13, 0xF5, 0x12, 0xE4, 0xF5,
+	0x16, 0xF5, 0x7F, 0x12, 0x19, 0x36, 0x12, 0x13,
+/*0CE0*/0xA3, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x50, 0x09,
+	0x05, 0x16, 0xE5, 0x16, 0xC3, 0x94, 0x14, 0x40,
+/*0CF0*/0xEA, 0xE5, 0xE4, 0x20, 0xE7, 0x28, 0x12, 0x0E,
+	0x04, 0x75, 0x83, 0xD2, 0xE0, 0x54, 0x08, 0xD3,
+/*0D00*/0x94, 0x00, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
+	0x7F, 0x00, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40,
+/*0D10*/0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF,
+	0x5E, 0x60, 0x03, 0x12, 0x1D, 0xD7, 0xE5, 0x7F,
+/*0D20*/0xC3, 0x94, 0x11, 0x40, 0x14, 0x12, 0x0E, 0x04,
+	0x75, 0x83, 0xD2, 0xE0, 0x44, 0x80, 0xF0, 0xE5,
+/*0D30*/0xE4, 0x20, 0xE7, 0x0F, 0x12, 0x1D, 0xD7, 0x80,
+	0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0xD2, 0xE0,
+/*0D40*/0x54, 0x7F, 0xF0, 0x12, 0x1D, 0x23, 0x22, 0x74,
+	0x8A, 0x85, 0x08, 0x82, 0xF5, 0x83, 0xE5, 0x17,
+/*0D50*/0xF0, 0x12, 0x0E, 0x3A, 0xE4, 0xF0, 0x90, 0x07,
+	0x02, 0xE0, 0x12, 0x0E, 0x17, 0x75, 0x83, 0x90,
+/*0D60*/0xEF, 0xF0, 0x74, 0x92, 0xFE, 0xE5, 0x08, 0x44,
+	0x07, 0xFF, 0xF5, 0x82, 0x8E, 0x83, 0xE0, 0x54,
+/*0D70*/0xC0, 0xFD, 0x90, 0x07, 0x03, 0xE0, 0x54, 0x3F,
+	0x4D, 0x8F, 0x82, 0x8E, 0x83, 0xF0, 0x90, 0x07,
+/*0D80*/0x04, 0xE0, 0x12, 0x0E, 0x17, 0x75, 0x83, 0x82,
+	0xEF, 0xF0, 0x90, 0x07, 0x05, 0xE0, 0xFF, 0xED,
+/*0D90*/0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xB4, 0xEF,
+	0x12, 0x0E, 0x03, 0x75, 0x83, 0x80, 0xE0, 0x54,
+/*0DA0*/0xBF, 0xF0, 0x30, 0x37, 0x0A, 0x12, 0x0E, 0x91,
+	0x75, 0x83, 0x94, 0xE0, 0x44, 0x80, 0xF0, 0x30,
+/*0DB0*/0x38, 0x0A, 0x12, 0x0E, 0x91, 0x75, 0x83, 0x92,
+	0xE0, 0x44, 0x80, 0xF0, 0xE5, 0x28, 0x30, 0xE4,
+/*0DC0*/0x1A, 0x20, 0x39, 0x0A, 0x12, 0x0E, 0x04, 0x75,
+	0x83, 0x88, 0xE0, 0x54, 0x7F, 0xF0, 0x20, 0x3A,
+/*0DD0*/0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0x88, 0xE0,
+	0x54, 0xBF, 0xF0, 0x74, 0x8C, 0xFE, 0x12, 0x0E,
+/*0DE0*/0x04, 0x8E, 0x83, 0xE0, 0x54, 0x0F, 0x12, 0x0E,
+	0x03, 0x75, 0x83, 0x86, 0xE0, 0x54, 0xBF, 0xF0,
+/*0DF0*/0xE5, 0x08, 0x44, 0x06, 0x12, 0x0D, 0xFD, 0x75,
+	0x83, 0x8A, 0xE4, 0xF0, 0x22, 0xF5, 0x82, 0x75,
+/*0E00*/0x83, 0x82, 0xE4, 0xF0, 0xE5, 0x08, 0x44, 0x07,
+	0xF5, 0x82, 0x22, 0x8E, 0x83, 0xE0, 0xF5, 0x10,
+/*0E10*/0x54, 0xFE, 0xF0, 0xE5, 0x10, 0x44, 0x01, 0xFF,
+	0xE5, 0x08, 0xFD, 0xED, 0x44, 0x07, 0xF5, 0x82,
+/*0E20*/0x22, 0xE5, 0x15, 0xC4, 0x54, 0x07, 0xFF, 0xE5,
+	0x08, 0xFD, 0xED, 0x44, 0x08, 0xF5, 0x82, 0x75,
+/*0E30*/0x83, 0x82, 0x22, 0x75, 0x83, 0x80, 0xE0, 0x44,
+	0x40, 0xF0, 0xE5, 0x08, 0x44, 0x08, 0xF5, 0x82,
+/*0E40*/0x75, 0x83, 0x8A, 0x22, 0xE5, 0x16, 0x25, 0xE0,
+	0x25, 0xE0, 0x24, 0xAF, 0xF5, 0x82, 0xE4, 0x34,
+/*0E50*/0x1A, 0xF5, 0x83, 0xE4, 0x93, 0xF5, 0x0D, 0x22,
+	0x43, 0xE1, 0x10, 0x43, 0xE1, 0x80, 0x53, 0xE1,
+/*0E60*/0xFD, 0x85, 0xE1, 0x10, 0x22, 0xE5, 0x16, 0x25,
+	0xE0, 0x25, 0xE0, 0x24, 0xB2, 0xF5, 0x82, 0xE4,
+/*0E70*/0x34, 0x1A, 0xF5, 0x83, 0xE4, 0x93, 0x22, 0x85,
+	0x55, 0x82, 0x85, 0x54, 0x83, 0xE5, 0x15, 0xF0,
+/*0E80*/0x22, 0xE5, 0xE2, 0x54, 0x20, 0xD3, 0x94, 0x00,
+	0x22, 0xE5, 0xE2, 0x54, 0x40, 0xD3, 0x94, 0x00,
+/*0E90*/0x22, 0xE5, 0x08, 0x44, 0x06, 0xF5, 0x82, 0x22,
+	0xFD, 0xE5, 0x08, 0xFB, 0xEB, 0x44, 0x07, 0xF5,
+/*0EA0*/0x82, 0x22, 0x53, 0xF9, 0xF7, 0x75, 0xFE, 0x30,
+	0x22, 0xEF, 0x4E, 0x70, 0x26, 0x12, 0x07, 0xCC,
+/*0EB0*/0xE0, 0xFD, 0x90, 0x07, 0x26, 0x12, 0x07, 0x7B,
+	0x12, 0x07, 0xD8, 0xE0, 0xFD, 0x90, 0x07, 0x28,
+/*0EC0*/0x12, 0x07, 0x7B, 0x12, 0x08, 0x81, 0x12, 0x07,
+	0x72, 0x12, 0x08, 0x35, 0xE0, 0x90, 0x07, 0x24,
+/*0ED0*/0x12, 0x07, 0x78, 0xEF, 0x64, 0x04, 0x4E, 0x70,
+	0x29, 0x12, 0x07, 0xE4, 0xE0, 0xFD, 0x90, 0x07,
+/*0EE0*/0x26, 0x12, 0x07, 0x7B, 0x12, 0x07, 0xF0, 0xE0,
+	0xFD, 0x90, 0x07, 0x28, 0x12, 0x07, 0x7B, 0x12,
+/*0EF0*/0x08, 0x8B, 0x12, 0x07, 0x72, 0x12, 0x08, 0x41,
+	0xE0, 0x54, 0x1F, 0xFD, 0x90, 0x07, 0x24, 0x12,
+/*0F00*/0x07, 0x7B, 0xEF, 0x64, 0x01, 0x4E, 0x70, 0x04,
+	0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, 0xEF, 0x64,
+/*0F10*/0x02, 0x4E, 0x70, 0x04, 0x7F, 0x01, 0x80, 0x02,
+	0x7F, 0x00, 0xEF, 0x4D, 0x60, 0x35, 0x12, 0x07,
+/*0F20*/0xFC, 0xE0, 0xFF, 0x90, 0x07, 0x26, 0x12, 0x07,
+	0x89, 0xEF, 0xF0, 0x12, 0x08, 0x08, 0xE0, 0xFF,
+/*0F30*/0x90, 0x07, 0x28, 0x12, 0x07, 0x89, 0xEF, 0xF0,
+	0x12, 0x08, 0x4D, 0xE0, 0x54, 0x1F, 0xFF, 0x12,
+/*0F40*/0x07, 0x86, 0xEF, 0xF0, 0x12, 0x08, 0x59, 0xE0,
+	0x54, 0x1F, 0xFF, 0x90, 0x07, 0x24, 0x12, 0x07,
+/*0F50*/0x89, 0xEF, 0xF0, 0x22, 0xE4, 0xF5, 0x53, 0x12,
+	0x0E, 0x81, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
+/*0F60*/0x7F, 0x00, 0x12, 0x0E, 0x89, 0x40, 0x04, 0x7E,
+	0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x70,
+/*0F70*/0x03, 0x02, 0x0F, 0xF6, 0x85, 0xE1, 0x10, 0x43,
+	0xE1, 0x02, 0x53, 0xE1, 0x0F, 0x85, 0xE1, 0x10,
+/*0F80*/0xE4, 0xF5, 0x51, 0xE5, 0xE3, 0x54, 0x3F, 0xF5,
+	0x52, 0x12, 0x0E, 0x89, 0x40, 0x1D, 0xAD, 0x52,
+/*0F90*/0xAF, 0x51, 0x12, 0x11, 0x18, 0xEF, 0x60, 0x08,
+	0x85, 0xE1, 0x10, 0x43, 0xE1, 0x40, 0x80, 0x0B,
+/*0FA0*/0x53, 0xE1, 0xBF, 0x12, 0x0E, 0x58, 0x12, 0x00,
+	0x06, 0x80, 0xFB, 0xE5, 0xE3, 0x54, 0x3F, 0xF5,
+/*0FB0*/0x51, 0xE5, 0xE4, 0x54, 0x3F, 0xF5, 0x52, 0x12,
+	0x0E, 0x81, 0x40, 0x1D, 0xAD, 0x52, 0xAF, 0x51,
+/*0FC0*/0x12, 0x11, 0x18, 0xEF, 0x60, 0x08, 0x85, 0xE1,
+	0x10, 0x43, 0xE1, 0x20, 0x80, 0x0B, 0x53, 0xE1,
+/*0FD0*/0xDF, 0x12, 0x0E, 0x58, 0x12, 0x00, 0x06, 0x80,
+	0xFB, 0x12, 0x0E, 0x81, 0x40, 0x04, 0x7F, 0x01,
+/*0FE0*/0x80, 0x02, 0x7F, 0x00, 0x12, 0x0E, 0x89, 0x40,
+	0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE,
+/*0FF0*/0x4F, 0x60, 0x03, 0x12, 0x0E, 0x5B, 0x22, 0x12,
+	0x0E, 0x21, 0xEF, 0xF0, 0x12, 0x10, 0x91, 0x22,
+/*1000*/0x02, 0x11, 0x00, 0x02, 0x10, 0x40, 0x02, 0x10,
+	0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1010*/0x01, 0x20, 0x01, 0x20, 0xE4, 0xF5, 0x57, 0x12,
+	0x16, 0xBD, 0x12, 0x16, 0x44, 0xE4, 0x12, 0x10,
+/*1020*/0x56, 0x12, 0x14, 0xB7, 0x90, 0x07, 0x26, 0x12,
+	0x07, 0x35, 0xE4, 0x12, 0x07, 0x31, 0xE4, 0xF0,
+/*1030*/0x12, 0x10, 0x56, 0x12, 0x14, 0xB7, 0x90, 0x07,
+	0x26, 0x12, 0x07, 0x35, 0xE5, 0x41, 0x12, 0x07,
+/*1040*/0x31, 0xE5, 0x40, 0xF0, 0xAF, 0x57, 0x7E, 0x00,
+	0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44, 0xAF,
+/*1050*/0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0xFF, 0x90,
+	0x07, 0x20, 0xA3, 0xE0, 0xFD, 0xE4, 0xF5, 0x56,
+/*1060*/0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA, 0x12,
+	0x11, 0x51, 0x7F, 0x0F, 0x7D, 0x18, 0xE4, 0xF5,
+/*1070*/0x56, 0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA,
+	0x12, 0x15, 0x41, 0xAF, 0x56, 0x7E, 0x00, 0x12,
+/*1080*/0x1A, 0xFF, 0xE4, 0xFF, 0xF5, 0x56, 0x7D, 0x1F,
+	0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA, 0x22,
+/*1090*/0x22, 0xE4, 0xF5, 0x55, 0xE5, 0x08, 0xFD, 0x74,
+	0xA0, 0xF5, 0x56, 0xED, 0x44, 0x07, 0xF5, 0x57,
+/*10A0*/0xE5, 0x28, 0x30, 0xE5, 0x03, 0xD3, 0x80, 0x01,
+	0xC3, 0x40, 0x05, 0x7F, 0x28, 0xEF, 0x80, 0x04,
+/*10B0*/0x7F, 0x14, 0xEF, 0xC3, 0x13, 0xF5, 0x54, 0xE4,
+	0xF9, 0x12, 0x0E, 0x18, 0x75, 0x83, 0x8E, 0xE0,
+/*10C0*/0xF5, 0x10, 0xCE, 0xEF, 0xCE, 0xEE, 0xD3, 0x94,
+	0x00, 0x40, 0x26, 0xE5, 0x10, 0x54, 0xFE, 0x12,
+/*10D0*/0x0E, 0x98, 0x75, 0x83, 0x8E, 0xED, 0xF0, 0xE5,
+	0x10, 0x44, 0x01, 0xFD, 0xEB, 0x44, 0x07, 0xF5,
+/*10E0*/0x82, 0xED, 0xF0, 0x85, 0x57, 0x82, 0x85, 0x56,
+	0x83, 0xE0, 0x30, 0xE3, 0x01, 0x09, 0x1E, 0x80,
+/*10F0*/0xD4, 0xC2, 0x34, 0xE9, 0xC3, 0x95, 0x54, 0x40,
+	0x02, 0xD2, 0x34, 0x22, 0x02, 0x00, 0x06, 0x22,
+/*1100*/0x30, 0x30, 0x11, 0x90, 0x10, 0x00, 0xE4, 0x93,
+	0xF5, 0x10, 0x90, 0x10, 0x10, 0xE4, 0x93, 0xF5,
+/*1110*/0x10, 0x12, 0x10, 0x90, 0x12, 0x11, 0x50, 0x22,
+	0xE4, 0xFC, 0xC3, 0xED, 0x9F, 0xFA, 0xEF, 0xF5,
+/*1120*/0x83, 0x75, 0x82, 0x00, 0x79, 0xFF, 0xE4, 0x93,
+	0xCC, 0x6C, 0xCC, 0xA3, 0xD9, 0xF8, 0xDA, 0xF6,
+/*1130*/0xE5, 0xE2, 0x30, 0xE4, 0x02, 0x8C, 0xE5, 0xED,
+	0x24, 0xFF, 0xFF, 0xEF, 0x75, 0x82, 0xFF, 0xF5,
+/*1140*/0x83, 0xE4, 0x93, 0x6C, 0x70, 0x03, 0x7F, 0x01,
+	0x22, 0x7F, 0x00, 0x22, 0x22, 0x11, 0x00, 0x00,
+/*1150*/0x22, 0x8E, 0x58, 0x8F, 0x59, 0x8C, 0x5A, 0x8D,
+	0x5B, 0x8A, 0x5C, 0x8B, 0x5D, 0x75, 0x5E, 0x01,
+/*1160*/0xE4, 0xF5, 0x5F, 0xF5, 0x60, 0xF5, 0x62, 0x12,
+	0x07, 0x2A, 0x75, 0x83, 0xD0, 0xE0, 0xFF, 0xC4,
+/*1170*/0x54, 0x0F, 0xF5, 0x61, 0x12, 0x1E, 0xA5, 0x85,
+	0x59, 0x5E, 0xD3, 0xE5, 0x5E, 0x95, 0x5B, 0xE5,
+/*1180*/0x5A, 0x12, 0x07, 0x6B, 0x50, 0x4B, 0x12, 0x07,
+	0x03, 0x75, 0x83, 0xBC, 0xE0, 0x45, 0x5E, 0x12,
+/*1190*/0x07, 0x29, 0x75, 0x83, 0xBE, 0xE0, 0x45, 0x5E,
+	0x12, 0x07, 0x29, 0x75, 0x83, 0xC0, 0xE0, 0x45,
+/*11A0*/0x5E, 0xF0, 0xAF, 0x5F, 0xE5, 0x60, 0x12, 0x08,
+	0x78, 0x12, 0x0A, 0xFF, 0xAF, 0x62, 0x7E, 0x00,
+/*11B0*/0xAD, 0x5D, 0xAC, 0x5C, 0x12, 0x04, 0x44, 0xE5,
+	0x61, 0xAF, 0x5E, 0x7E, 0x00, 0xB4, 0x03, 0x05,
+/*11C0*/0x12, 0x1E, 0x21, 0x80, 0x07, 0xAD, 0x5D, 0xAC,
+	0x5C, 0x12, 0x13, 0x17, 0x05, 0x5E, 0x02, 0x11,
+/*11D0*/0x7A, 0x12, 0x07, 0x03, 0x75, 0x83, 0xBC, 0xE0,
+	0x45, 0x40, 0x12, 0x07, 0x29, 0x75, 0x83, 0xBE,
+/*11E0*/0xE0, 0x45, 0x40, 0x12, 0x07, 0x29, 0x75, 0x83,
+	0xC0, 0xE0, 0x45, 0x40, 0xF0, 0x22, 0x8E, 0x58,
+/*11F0*/0x8F, 0x59, 0x75, 0x5A, 0x01, 0x79, 0x01, 0x75,
+	0x5B, 0x01, 0xE4, 0xFB, 0x12, 0x07, 0x2A, 0x75,
+/*1200*/0x83, 0xAE, 0xE0, 0x54, 0x1A, 0xFF, 0x12, 0x08,
+	0x65, 0xE0, 0xC4, 0x13, 0x54, 0x07, 0xFE, 0xEF,
+/*1210*/0x70, 0x0C, 0xEE, 0x65, 0x35, 0x70, 0x07, 0x90,
+	0x07, 0x2F, 0xE0, 0xB4, 0x01, 0x0D, 0xAF, 0x35,
+/*1220*/0x7E, 0x00, 0x12, 0x0E, 0xA9, 0xCF, 0xEB, 0xCF,
+	0x02, 0x1E, 0x60, 0xE5, 0x59, 0x64, 0x02, 0x45,
+/*1230*/0x58, 0x70, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F,
+	0x00, 0xE5, 0x59, 0x45, 0x58, 0x70, 0x04, 0x7E,
+/*1240*/0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x60,
+	0x23, 0x85, 0x41, 0x49, 0x85, 0x40, 0x4B, 0xE5,
+/*1250*/0x59, 0x45, 0x58, 0x70, 0x2C, 0xAF, 0x5A, 0xFE,
+	0xCD, 0xE9, 0xCD, 0xFC, 0xAB, 0x59, 0xAA, 0x58,
+/*1260*/0x12, 0x0A, 0xFF, 0xAF, 0x5B, 0x7E, 0x00, 0x12,
+	0x1E, 0x60, 0x80, 0x15, 0xAF, 0x5B, 0x7E, 0x00,
+/*1270*/0x12, 0x1E, 0x60, 0x90, 0x07, 0x26, 0x12, 0x07,
+	0x35, 0xE5, 0x49, 0x12, 0x07, 0x31, 0xE5, 0x4B,
+/*1280*/0xF0, 0xE4, 0xFD, 0xAF, 0x35, 0xFE, 0xFC, 0x12,
+	0x09, 0x15, 0x22, 0x8C, 0x64, 0x8D, 0x65, 0x12,
+/*1290*/0x08, 0xDA, 0x40, 0x3C, 0xE5, 0x65, 0x45, 0x64,
+	0x70, 0x10, 0x12, 0x09, 0x04, 0xC3, 0xE5, 0x3E,
+/*12A0*/0x12, 0x07, 0x69, 0x40, 0x3B, 0x12, 0x08, 0x95,
+	0x80, 0x18, 0xE5, 0x3E, 0xC3, 0x95, 0x38, 0x40,
+/*12B0*/0x1D, 0x85, 0x3E, 0x38, 0xE5, 0x3E, 0x60, 0x05,
+	0x85, 0x3F, 0x39, 0x80, 0x03, 0x85, 0x39, 0x39,
+/*12C0*/0x8F, 0x3A, 0x12, 0x07, 0xA8, 0xE5, 0x3E, 0x12,
+	0x07, 0x53, 0xE5, 0x3F, 0xF0, 0x22, 0x80, 0x3B,
+/*12D0*/0xE5, 0x65, 0x45, 0x64, 0x70, 0x11, 0x12, 0x07,
+	0x5F, 0x40, 0x05, 0x12, 0x08, 0x9E, 0x80, 0x1F,
+/*12E0*/0x12, 0x07, 0x3E, 0xE5, 0x41, 0xF0, 0x22, 0xE5,
+	0x3C, 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3C,
+/*12F0*/0x38, 0xE5, 0x3C, 0x60, 0x05, 0x85, 0x3D, 0x39,
+	0x80, 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12,
+/*1300*/0x07, 0xA8, 0xE5, 0x3C, 0x12, 0x07, 0x53, 0xE5,
+	0x3D, 0xF0, 0x22, 0x12, 0x07, 0x9F, 0xE5, 0x38,
+/*1310*/0x12, 0x07, 0x53, 0xE5, 0x39, 0xF0, 0x22, 0x8C,
+	0x63, 0x8D, 0x64, 0x12, 0x08, 0xDA, 0x40, 0x3C,
+/*1320*/0xE5, 0x64, 0x45, 0x63, 0x70, 0x10, 0x12, 0x09,
+	0x04, 0xC3, 0xE5, 0x3E, 0x12, 0x07, 0x69, 0x40,
+/*1330*/0x3B, 0x12, 0x08, 0x95, 0x80, 0x18, 0xE5, 0x3E,
+	0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3E, 0x38,
+/*1340*/0xE5, 0x3E, 0x60, 0x05, 0x85, 0x3F, 0x39, 0x80,
+	0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12, 0x07,
+/*1350*/0xA8, 0xE5, 0x3E, 0x12, 0x07, 0x53, 0xE5, 0x3F,
+	0xF0, 0x22, 0x80, 0x3B, 0xE5, 0x64, 0x45, 0x63,
+/*1360*/0x70, 0x11, 0x12, 0x07, 0x5F, 0x40, 0x05, 0x12,
+	0x08, 0x9E, 0x80, 0x1F, 0x12, 0x07, 0x3E, 0xE5,
+/*1370*/0x41, 0xF0, 0x22, 0xE5, 0x3C, 0xC3, 0x95, 0x38,
+	0x40, 0x1D, 0x85, 0x3C, 0x38, 0xE5, 0x3C, 0x60,
+/*1380*/0x05, 0x85, 0x3D, 0x39, 0x80, 0x03, 0x85, 0x39,
+	0x39, 0x8F, 0x3A, 0x12, 0x07, 0xA8, 0xE5, 0x3C,
+/*1390*/0x12, 0x07, 0x53, 0xE5, 0x3D, 0xF0, 0x22, 0x12,
+	0x07, 0x9F, 0xE5, 0x38, 0x12, 0x07, 0x53, 0xE5,
+/*13A0*/0x39, 0xF0, 0x22, 0xE5, 0x0D, 0xFE, 0xE5, 0x08,
+	0x8E, 0x54, 0x44, 0x05, 0xF5, 0x55, 0x75, 0x15,
+/*13B0*/0x0F, 0xF5, 0x82, 0x12, 0x0E, 0x7A, 0x12, 0x17,
+	0xA3, 0x20, 0x31, 0x05, 0x75, 0x15, 0x03, 0x80,
+/*13C0*/0x03, 0x75, 0x15, 0x0B, 0xE5, 0x0A, 0xC3, 0x94,
+	0x01, 0x50, 0x38, 0x12, 0x14, 0x20, 0x20, 0x31,
+/*13D0*/0x06, 0x05, 0x15, 0x05, 0x15, 0x80, 0x04, 0x15,
+	0x15, 0x15, 0x15, 0xE5, 0x0A, 0xC3, 0x94, 0x01,
+/*13E0*/0x50, 0x21, 0x12, 0x14, 0x20, 0x20, 0x31, 0x04,
+	0x05, 0x15, 0x80, 0x02, 0x15, 0x15, 0xE5, 0x0A,
+/*13F0*/0xC3, 0x94, 0x01, 0x50, 0x0E, 0x12, 0x0E, 0x77,
+	0x12, 0x17, 0xA3, 0x20, 0x31, 0x05, 0x05, 0x15,
+/*1400*/0x12, 0x0E, 0x77, 0xE5, 0x15, 0xB4, 0x08, 0x04,
+	0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x15,
+/*1410*/0xB4, 0x07, 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E,
+	0x00, 0xEE, 0x4F, 0x60, 0x02, 0x05, 0x7F, 0x22,
+/*1420*/0x85, 0x55, 0x82, 0x85, 0x54, 0x83, 0xE5, 0x15,
+	0xF0, 0x12, 0x17, 0xA3, 0x22, 0x12, 0x07, 0x2A,
+/*1430*/0x75, 0x83, 0xAE, 0x74, 0xFF, 0x12, 0x07, 0x29,
+	0xE0, 0x54, 0x1A, 0xF5, 0x34, 0xE0, 0xC4, 0x13,
+/*1440*/0x54, 0x07, 0xF5, 0x35, 0x24, 0xFE, 0x60, 0x24,
+	0x24, 0xFE, 0x60, 0x3C, 0x24, 0x04, 0x70, 0x63,
+/*1450*/0x75, 0x31, 0x2D, 0xE5, 0x08, 0xFD, 0x74, 0xB6,
+	0x12, 0x07, 0x92, 0x74, 0xBC, 0x90, 0x07, 0x22,
+/*1460*/0x12, 0x07, 0x95, 0x74, 0x90, 0x12, 0x07, 0xB3,
+	0x74, 0x92, 0x80, 0x3C, 0x75, 0x31, 0x3A, 0xE5,
+/*1470*/0x08, 0xFD, 0x74, 0xBA, 0x12, 0x07, 0x92, 0x74,
+	0xC0, 0x90, 0x07, 0x22, 0x12, 0x07, 0xB6, 0x74,
+/*1480*/0xC4, 0x12, 0x07, 0xB3, 0x74, 0xC8, 0x80, 0x20,
+	0x75, 0x31, 0x35, 0xE5, 0x08, 0xFD, 0x74, 0xB8,
+/*1490*/0x12, 0x07, 0x92, 0x74, 0xBE, 0xFF, 0xED, 0x44,
+	0x07, 0x90, 0x07, 0x22, 0xCF, 0xF0, 0xA3, 0xEF,
+/*14A0*/0xF0, 0x74, 0xC2, 0x12, 0x07, 0xB3, 0x74, 0xC6,
+	0xFF, 0xED, 0x44, 0x07, 0xA3, 0xCF, 0xF0, 0xA3,
+/*14B0*/0xEF, 0xF0, 0x22, 0x75, 0x34, 0x01, 0x22, 0x8E,
+	0x58, 0x8F, 0x59, 0x8C, 0x5A, 0x8D, 0x5B, 0x8A,
+/*14C0*/0x5C, 0x8B, 0x5D, 0x75, 0x5E, 0x01, 0xE4, 0xF5,
+	0x5F, 0x12, 0x1E, 0xA5, 0x85, 0x59, 0x5E, 0xD3,
+/*14D0*/0xE5, 0x5E, 0x95, 0x5B, 0xE5, 0x5A, 0x12, 0x07,
+	0x6B, 0x50, 0x57, 0xE5, 0x5D, 0x45, 0x5C, 0x70,
+/*14E0*/0x30, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x92, 0xE5,
+	0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC6, 0xE5,
+/*14F0*/0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC8, 0xE5,
+	0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0x90, 0xE5,
+/*1500*/0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE5,
+	0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0x80,
+/*1510*/0x03, 0x12, 0x07, 0x32, 0xE5, 0x5E, 0xF0, 0xAF,
+	0x5F, 0x7E, 0x00, 0xAD, 0x5D, 0xAC, 0x5C, 0x12,
+/*1520*/0x04, 0x44, 0xAF, 0x5E, 0x7E, 0x00, 0xAD, 0x5D,
+	0xAC, 0x5C, 0x12, 0x0B, 0xD1, 0x05, 0x5E, 0x02,
+/*1530*/0x14, 0xCF, 0xAB, 0x5D, 0xAA, 0x5C, 0xAD, 0x5B,
+	0xAC, 0x5A, 0xAF, 0x59, 0xAE, 0x58, 0x02, 0x1B,
+/*1540*/0xFB, 0x8C, 0x5C, 0x8D, 0x5D, 0x8A, 0x5E, 0x8B,
+	0x5F, 0x75, 0x60, 0x01, 0xE4, 0xF5, 0x61, 0xF5,
+/*1550*/0x62, 0xF5, 0x63, 0x12, 0x1E, 0xA5, 0x8F, 0x60,
+	0xD3, 0xE5, 0x60, 0x95, 0x5D, 0xE5, 0x5C, 0x12,
+/*1560*/0x07, 0x6B, 0x50, 0x61, 0xE5, 0x5F, 0x45, 0x5E,
+	0x70, 0x27, 0x12, 0x07, 0x2A, 0x75, 0x83, 0xB6,
+/*1570*/0xE5, 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0xB8,
+	0xE5, 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0xBA,
+/*1580*/0xE5, 0x60, 0xF0, 0xAF, 0x61, 0x7E, 0x00, 0xE5,
+	0x62, 0x12, 0x08, 0x7A, 0x12, 0x0A, 0xFF, 0x80,
+/*1590*/0x19, 0x90, 0x07, 0x24, 0x12, 0x07, 0x35, 0xE5,
+	0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0x8E, 0xE4,
+/*15A0*/0x12, 0x07, 0x29, 0x74, 0x01, 0x12, 0x07, 0x29,
+	0xE4, 0xF0, 0xAF, 0x63, 0x7E, 0x00, 0xAD, 0x5F,
+/*15B0*/0xAC, 0x5E, 0x12, 0x04, 0x44, 0xAF, 0x60, 0x7E,
+	0x00, 0xAD, 0x5F, 0xAC, 0x5E, 0x12, 0x12, 0x8B,
+/*15C0*/0x05, 0x60, 0x02, 0x15, 0x58, 0x22, 0x90, 0x11,
+	0x4D, 0xE4, 0x93, 0x90, 0x07, 0x2E, 0xF0, 0x12,
+/*15D0*/0x08, 0x1F, 0x75, 0x83, 0xAE, 0xE0, 0x54, 0x1A,
+	0xF5, 0x34, 0x70, 0x67, 0xEF, 0x44, 0x07, 0xF5,
+/*15E0*/0x82, 0x75, 0x83, 0xCE, 0xE0, 0xFF, 0x13, 0x13,
+	0x13, 0x54, 0x07, 0xF5, 0x36, 0x54, 0x0F, 0xD3,
+/*15F0*/0x94, 0x00, 0x40, 0x06, 0x12, 0x14, 0x2D, 0x12,
+	0x1B, 0xA9, 0xE5, 0x36, 0x54, 0x0F, 0x24, 0xFE,
+/*1600*/0x60, 0x0C, 0x14, 0x60, 0x0C, 0x14, 0x60, 0x19,
+	0x24, 0x03, 0x70, 0x37, 0x80, 0x10, 0x02, 0x1E,
+/*1610*/0x91, 0x12, 0x1E, 0x91, 0x12, 0x07, 0x2A, 0x75,
+	0x83, 0xCE, 0xE0, 0x54, 0xEF, 0xF0, 0x02, 0x1D,
+/*1620*/0xAE, 0x12, 0x10, 0x14, 0xE4, 0xF5, 0x55, 0x12,
+	0x1D, 0x85, 0x05, 0x55, 0xE5, 0x55, 0xC3, 0x94,
+/*1630*/0x05, 0x40, 0xF4, 0x12, 0x07, 0x2A, 0x75, 0x83,
+	0xCE, 0xE0, 0x54, 0xC7, 0x12, 0x07, 0x29, 0xE0,
+/*1640*/0x44, 0x08, 0xF0, 0x22, 0xE4, 0xF5, 0x58, 0xF5,
+	0x59, 0xAF, 0x08, 0xEF, 0x44, 0x07, 0xF5, 0x82,
+/*1650*/0x75, 0x83, 0xD0, 0xE0, 0xFD, 0xC4, 0x54, 0x0F,
+	0xF5, 0x5A, 0xEF, 0x44, 0x07, 0xF5, 0x82, 0x75,
+/*1660*/0x83, 0x80, 0x74, 0x01, 0xF0, 0x12, 0x08, 0x21,
+	0x75, 0x83, 0x82, 0xE5, 0x45, 0xF0, 0xEF, 0x44,
+/*1670*/0x07, 0xF5, 0x82, 0x75, 0x83, 0x8A, 0x74, 0xFF,
+	0xF0, 0x12, 0x1A, 0x4D, 0x12, 0x07, 0x2A, 0x75,
+/*1680*/0x83, 0xBC, 0xE0, 0x54, 0xEF, 0x12, 0x07, 0x29,
+	0x75, 0x83, 0xBE, 0xE0, 0x54, 0xEF, 0x12, 0x07,
+/*1690*/0x29, 0x75, 0x83, 0xC0, 0xE0, 0x54, 0xEF, 0x12,
+	0x07, 0x29, 0x75, 0x83, 0xBC, 0xE0, 0x44, 0x10,
+/*16A0*/0x12, 0x07, 0x29, 0x75, 0x83, 0xBE, 0xE0, 0x44,
+	0x10, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC0, 0xE0,
+/*16B0*/0x44, 0x10, 0xF0, 0xAF, 0x58, 0xE5, 0x59, 0x12,
+	0x08, 0x78, 0x02, 0x0A, 0xFF, 0xE4, 0xF5, 0x58,
+/*16C0*/0x7D, 0x01, 0xF5, 0x59, 0xAF, 0x35, 0xFE, 0xFC,
+	0x12, 0x09, 0x15, 0x12, 0x07, 0x2A, 0x75, 0x83,
+/*16D0*/0xB6, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+	0xB8, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*16E0*/0xBA, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+	0xBC, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*16F0*/0xBE, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+	0xC0, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*1700*/0x90, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2,
+	0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0xE4,
+/*1710*/0x12, 0x07, 0x29, 0x75, 0x83, 0x92, 0xE4, 0x12,
+	0x07, 0x29, 0x75, 0x83, 0xC6, 0xE4, 0x12, 0x07,
+/*1720*/0x29, 0x75, 0x83, 0xC8, 0xE4, 0xF0, 0xAF, 0x58,
+	0xFE, 0xE5, 0x59, 0x12, 0x08, 0x7A, 0x02, 0x0A,
+/*1730*/0xFF, 0xE5, 0xE2, 0x30, 0xE4, 0x6C, 0xE5, 0xE7,
+	0x54, 0xC0, 0x64, 0x40, 0x70, 0x64, 0xE5, 0x09,
+/*1740*/0xC4, 0x54, 0x30, 0xFE, 0xE5, 0x08, 0x25, 0xE0,
+	0x25, 0xE0, 0x54, 0xC0, 0x4E, 0xFE, 0xEF, 0x54,
+/*1750*/0x3F, 0x4E, 0xFD, 0xE5, 0x2B, 0xAE, 0x2A, 0x78,
+	0x02, 0xC3, 0x33, 0xCE, 0x33, 0xCE, 0xD8, 0xF9,
+/*1760*/0xF5, 0x82, 0x8E, 0x83, 0xED, 0xF0, 0xE5, 0x2B,
+	0xAE, 0x2A, 0x78, 0x02, 0xC3, 0x33, 0xCE, 0x33,
+/*1770*/0xCE, 0xD8, 0xF9, 0xFF, 0xF5, 0x82, 0x8E, 0x83,
+	0xA3, 0xE5, 0xFE, 0xF0, 0x8F, 0x82, 0x8E, 0x83,
+/*1780*/0xA3, 0xA3, 0xE5, 0xFD, 0xF0, 0x8F, 0x82, 0x8E,
+	0x83, 0xA3, 0xA3, 0xA3, 0xE5, 0xFC, 0xF0, 0xC3,
+/*1790*/0xE5, 0x2B, 0x94, 0xFA, 0xE5, 0x2A, 0x94, 0x00,
+	0x50, 0x08, 0x05, 0x2B, 0xE5, 0x2B, 0x70, 0x02,
+/*17A0*/0x05, 0x2A, 0x22, 0xE4, 0xFF, 0xE4, 0xF5, 0x58,
+	0xF5, 0x56, 0xF5, 0x57, 0x74, 0x82, 0xFC, 0x12,
+/*17B0*/0x0E, 0x04, 0x8C, 0x83, 0xE0, 0xF5, 0x10, 0x54,
+	0x7F, 0xF0, 0xE5, 0x10, 0x44, 0x80, 0x12, 0x0E,
+/*17C0*/0x98, 0xED, 0xF0, 0x7E, 0x0A, 0x12, 0x0E, 0x04,
+	0x75, 0x83, 0xA0, 0xE0, 0x20, 0xE0, 0x26, 0xDE,
+/*17D0*/0xF4, 0x05, 0x57, 0xE5, 0x57, 0x70, 0x02, 0x05,
+	0x56, 0xE5, 0x14, 0x24, 0x01, 0xFD, 0xE4, 0x33,
+/*17E0*/0xFC, 0xD3, 0xE5, 0x57, 0x9D, 0xE5, 0x56, 0x9C,
+	0x40, 0xD9, 0xE5, 0x0A, 0x94, 0x20, 0x50, 0x02,
+/*17F0*/0x05, 0x0A, 0x43, 0xE1, 0x08, 0xC2, 0x31, 0x12,
+	0x0E, 0x04, 0x75, 0x83, 0xA6, 0xE0, 0x55, 0x12,
+/*1800*/0x65, 0x12, 0x70, 0x03, 0xD2, 0x31, 0x22, 0xC2,
+	0x31, 0x22, 0x90, 0x07, 0x26, 0xE0, 0xFA, 0xA3,
+/*1810*/0xE0, 0xF5, 0x82, 0x8A, 0x83, 0xE0, 0xF5, 0x41,
+	0xE5, 0x39, 0xC3, 0x95, 0x41, 0x40, 0x26, 0xE5,
+/*1820*/0x39, 0x95, 0x41, 0xC3, 0x9F, 0xEE, 0x12, 0x07,
+	0x6B, 0x40, 0x04, 0x7C, 0x01, 0x80, 0x02, 0x7C,
+/*1830*/0x00, 0xE5, 0x41, 0x64, 0x3F, 0x60, 0x04, 0x7B,
+	0x01, 0x80, 0x02, 0x7B, 0x00, 0xEC, 0x5B, 0x60,
+/*1840*/0x29, 0x05, 0x41, 0x80, 0x28, 0xC3, 0xE5, 0x41,
+	0x95, 0x39, 0xC3, 0x9F, 0xEE, 0x12, 0x07, 0x6B,
+/*1850*/0x40, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00,
+	0xE5, 0x41, 0x60, 0x04, 0x7E, 0x01, 0x80, 0x02,
+/*1860*/0x7E, 0x00, 0xEF, 0x5E, 0x60, 0x04, 0x15, 0x41,
+	0x80, 0x03, 0x85, 0x39, 0x41, 0x85, 0x3A, 0x40,
+/*1870*/0x22, 0xE5, 0xE2, 0x30, 0xE4, 0x60, 0xE5, 0xE1,
+	0x30, 0xE2, 0x5B, 0xE5, 0x09, 0x70, 0x04, 0x7F,
+/*1880*/0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x08, 0x70,
+	0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE,
+/*1890*/0x5F, 0x60, 0x43, 0x53, 0xF9, 0xF8, 0xE5, 0xE2,
+	0x30, 0xE4, 0x3B, 0xE5, 0xE1, 0x30, 0xE2, 0x2E,
+/*18A0*/0x43, 0xFA, 0x02, 0x53, 0xFA, 0xFB, 0xE4, 0xF5,
+	0x10, 0x90, 0x94, 0x70, 0xE5, 0x10, 0xF0, 0xE5,
+/*18B0*/0xE1, 0x30, 0xE2, 0xE7, 0x90, 0x94, 0x70, 0xE0,
+	0x65, 0x10, 0x60, 0x03, 0x43, 0xFA, 0x04, 0x05,
+/*18C0*/0x10, 0x90, 0x94, 0x70, 0xE5, 0x10, 0xF0, 0x70,
+	0xE6, 0x12, 0x00, 0x06, 0x80, 0xE1, 0x53, 0xFA,
+/*18D0*/0xFD, 0x53, 0xFA, 0xFB, 0x80, 0xC0, 0x22, 0x8F,
+	0x54, 0x12, 0x00, 0x06, 0xE5, 0xE1, 0x30, 0xE0,
+/*18E0*/0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5,
+	0x7E, 0xD3, 0x94, 0x05, 0x40, 0x04, 0x7E, 0x01,
+/*18F0*/0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x60, 0x3D,
+	0x85, 0x54, 0x11, 0xE5, 0xE2, 0x20, 0xE1, 0x32,
+/*1900*/0x74, 0xCE, 0x12, 0x1A, 0x05, 0x30, 0xE7, 0x04,
+	0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, 0x8F, 0x82,
+/*1910*/0x8E, 0x83, 0xE0, 0x30, 0xE6, 0x04, 0x7F, 0x01,
+	0x80, 0x02, 0x7F, 0x00, 0xEF, 0x5D, 0x70, 0x15,
+/*1920*/0x12, 0x15, 0xC6, 0x74, 0xCE, 0x12, 0x1A, 0x05,
+	0x30, 0xE6, 0x07, 0xE0, 0x44, 0x80, 0xF0, 0x43,
+/*1930*/0xF9, 0x80, 0x12, 0x18, 0x71, 0x22, 0x12, 0x0E,
+	0x44, 0xE5, 0x16, 0x25, 0xE0, 0x25, 0xE0, 0x24,
+/*1940*/0xB0, 0xF5, 0x82, 0xE4, 0x34, 0x1A, 0xF5, 0x83,
+	0xE4, 0x93, 0xF5, 0x0F, 0xE5, 0x16, 0x25, 0xE0,
+/*1950*/0x25, 0xE0, 0x24, 0xB1, 0xF5, 0x82, 0xE4, 0x34,
+	0x1A, 0xF5, 0x83, 0xE4, 0x93, 0xF5, 0x0E, 0x12,
+/*1960*/0x0E, 0x65, 0xF5, 0x10, 0xE5, 0x0F, 0x54, 0xF0,
+	0x12, 0x0E, 0x17, 0x75, 0x83, 0x8C, 0xEF, 0xF0,
+/*1970*/0xE5, 0x0F, 0x30, 0xE0, 0x0C, 0x12, 0x0E, 0x04,
+	0x75, 0x83, 0x86, 0xE0, 0x44, 0x40, 0xF0, 0x80,
+/*1980*/0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0x86, 0xE0,
+	0x54, 0xBF, 0xF0, 0x12, 0x0E, 0x91, 0x75, 0x83,
+/*1990*/0x82, 0xE5, 0x0E, 0xF0, 0x22, 0x7F, 0x05, 0x12,
+	0x17, 0x31, 0x12, 0x0E, 0x04, 0x12, 0x0E, 0x33,
+/*19A0*/0x74, 0x02, 0xF0, 0x74, 0x8E, 0xFE, 0x12, 0x0E,
+	0x04, 0x12, 0x0E, 0x0B, 0xEF, 0xF0, 0x75, 0x15,
+/*19B0*/0x70, 0x12, 0x0F, 0xF7, 0x20, 0x34, 0x05, 0x75,
+	0x15, 0x10, 0x80, 0x03, 0x75, 0x15, 0x50, 0x12,
+/*19C0*/0x0F, 0xF7, 0x20, 0x34, 0x04, 0x74, 0x10, 0x80,
+	0x02, 0x74, 0xF0, 0x25, 0x15, 0xF5, 0x15, 0x12,
+/*19D0*/0x0E, 0x21, 0xEF, 0xF0, 0x12, 0x10, 0x91, 0x20,
+	0x34, 0x17, 0xE5, 0x15, 0x64, 0x30, 0x60, 0x0C,
+/*19E0*/0x74, 0x10, 0x25, 0x15, 0xF5, 0x15, 0xB4, 0x80,
+	0x03, 0xE4, 0xF5, 0x15, 0x12, 0x0E, 0x21, 0xEF,
+/*19F0*/0xF0, 0x22, 0xF0, 0xE5, 0x0B, 0x25, 0xE0, 0x25,
+	0xE0, 0x24, 0x82, 0xF5, 0x82, 0xE4, 0x34, 0x07,
+/*1A00*/0xF5, 0x83, 0x22, 0x74, 0x88, 0xFE, 0xE5, 0x08,
+	0x44, 0x07, 0xFF, 0xF5, 0x82, 0x8E, 0x83, 0xE0,
+/*1A10*/0x22, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82,
+	0x22, 0xF0, 0xE0, 0x54, 0xC0, 0x8F, 0x82, 0x8E,
+/*1A20*/0x83, 0xF0, 0x22, 0xEF, 0x44, 0x07, 0xF5, 0x82,
+	0x75, 0x83, 0x86, 0xE0, 0x54, 0x10, 0xD3, 0x94,
+/*1A30*/0x00, 0x22, 0xF0, 0x90, 0x07, 0x15, 0xE0, 0x04,
+	0xF0, 0x22, 0x44, 0x06, 0xF5, 0x82, 0x75, 0x83,
+/*1A40*/0x9E, 0xE0, 0x22, 0xFE, 0xEF, 0x44, 0x07, 0xF5,
+	0x82, 0x8E, 0x83, 0xE0, 0x22, 0xE4, 0x90, 0x07,
+/*1A50*/0x2A, 0xF0, 0xA3, 0xF0, 0x12, 0x07, 0x2A, 0x75,
+	0x83, 0x82, 0xE0, 0x54, 0x7F, 0x12, 0x07, 0x29,
+/*1A60*/0xE0, 0x44, 0x80, 0xF0, 0x12, 0x10, 0xFC, 0x12,
+	0x08, 0x1F, 0x75, 0x83, 0xA0, 0xE0, 0x20, 0xE0,
+/*1A70*/0x1A, 0x90, 0x07, 0x2B, 0xE0, 0x04, 0xF0, 0x70,
+	0x06, 0x90, 0x07, 0x2A, 0xE0, 0x04, 0xF0, 0x90,
+/*1A80*/0x07, 0x2A, 0xE0, 0xB4, 0x10, 0xE1, 0xA3, 0xE0,
+	0xB4, 0x00, 0xDC, 0xEE, 0x44, 0xA6, 0xFC, 0xEF,
+/*1A90*/0x44, 0x07, 0xF5, 0x82, 0x8C, 0x83, 0xE0, 0xF5,
+	0x32, 0xEE, 0x44, 0xA8, 0xFE, 0xEF, 0x44, 0x07,
+/*1AA0*/0xF5, 0x82, 0x8E, 0x83, 0xE0, 0xF5, 0x33, 0x22,
+	0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x90,
+/*1AB0*/0x00, 0x20, 0x0F, 0x92, 0x00, 0x21, 0x0F, 0x94,
+	0x00, 0x22, 0x0F, 0x96, 0x00, 0x23, 0x0F, 0x98,
+/*1AC0*/0x00, 0x24, 0x0F, 0x9A, 0x00, 0x25, 0x0F, 0x9C,
+	0x00, 0x26, 0x0F, 0x9E, 0x00, 0x27, 0x0F, 0xA0,
+/*1AD0*/0x01, 0x20, 0x01, 0xA2, 0x01, 0x21, 0x01, 0xA4,
+	0x01, 0x22, 0x01, 0xA6, 0x01, 0x23, 0x01, 0xA8,
+/*1AE0*/0x01, 0x24, 0x01, 0xAA, 0x01, 0x25, 0x01, 0xAC,
+	0x01, 0x26, 0x01, 0xAE, 0x01, 0x27, 0x01, 0xB0,
+/*1AF0*/0x01, 0x28, 0x01, 0xB4, 0x00, 0x28, 0x0F, 0xB6,
+	0x40, 0x28, 0x0F, 0xB8, 0x61, 0x28, 0x01, 0xCB,
+/*1B00*/0xEF, 0xCB, 0xCA, 0xEE, 0xCA, 0x7F, 0x01, 0xE4,
+	0xFD, 0xEB, 0x4A, 0x70, 0x24, 0xE5, 0x08, 0xF5,
+/*1B10*/0x82, 0x74, 0xB6, 0x12, 0x08, 0x29, 0xE5, 0x08,
+	0xF5, 0x82, 0x74, 0xB8, 0x12, 0x08, 0x29, 0xE5,
+/*1B20*/0x08, 0xF5, 0x82, 0x74, 0xBA, 0x12, 0x08, 0x29,
+	0x7E, 0x00, 0x7C, 0x00, 0x12, 0x0A, 0xFF, 0x80,
+/*1B30*/0x12, 0x90, 0x07, 0x26, 0x12, 0x07, 0x35, 0xE5,
+	0x41, 0xF0, 0x90, 0x07, 0x24, 0x12, 0x07, 0x35,
+/*1B40*/0xE5, 0x40, 0xF0, 0x12, 0x07, 0x2A, 0x75, 0x83,
+	0x8E, 0xE4, 0x12, 0x07, 0x29, 0x74, 0x01, 0x12,
+/*1B50*/0x07, 0x29, 0xE4, 0xF0, 0x22, 0xE4, 0xF5, 0x26,
+	0xF5, 0x27, 0x53, 0xE1, 0xFE, 0xF5, 0x2A, 0x75,
+/*1B60*/0x2B, 0x01, 0xF5, 0x08, 0x7F, 0x01, 0x12, 0x17,
+	0x31, 0x30, 0x30, 0x1C, 0x90, 0x1A, 0xA9, 0xE4,
+/*1B70*/0x93, 0xF5, 0x10, 0x90, 0x1F, 0xF9, 0xE4, 0x93,
+	0xF5, 0x10, 0x90, 0x00, 0x41, 0xE4, 0x93, 0xF5,
+/*1B80*/0x10, 0x90, 0x1E, 0xCA, 0xE4, 0x93, 0xF5, 0x10,
+	0x7F, 0x02, 0x12, 0x17, 0x31, 0x12, 0x0F, 0x54,
+/*1B90*/0x7F, 0x03, 0x12, 0x17, 0x31, 0x12, 0x00, 0x06,
+	0xE5, 0xE2, 0x30, 0xE7, 0x09, 0x12, 0x10, 0x00,
+/*1BA0*/0x30, 0x30, 0x03, 0x12, 0x11, 0x00, 0x02, 0x00,
+	0x47, 0x12, 0x08, 0x1F, 0x75, 0x83, 0xD0, 0xE0,
+/*1BB0*/0xC4, 0x54, 0x0F, 0xFD, 0x75, 0x43, 0x01, 0x75,
+	0x44, 0xFF, 0x12, 0x08, 0xAA, 0x74, 0x04, 0xF0,
+/*1BC0*/0x75, 0x3B, 0x01, 0xED, 0x14, 0x60, 0x0C, 0x14,
+	0x60, 0x0B, 0x14, 0x60, 0x0F, 0x24, 0x03, 0x70,
+/*1BD0*/0x0B, 0x80, 0x09, 0x80, 0x00, 0x12, 0x08, 0xA7,
+	0x04, 0xF0, 0x80, 0x06, 0x12, 0x08, 0xA7, 0x74,
+/*1BE0*/0x04, 0xF0, 0xEE, 0x44, 0x82, 0xFE, 0xEF, 0x44,
+	0x07, 0xF5, 0x82, 0x8E, 0x83, 0xE5, 0x45, 0x12,
+/*1BF0*/0x08, 0xBE, 0x75, 0x83, 0x82, 0xE5, 0x31, 0xF0,
+	0x02, 0x11, 0x4C, 0x8E, 0x60, 0x8F, 0x61, 0x12,
+/*1C00*/0x1E, 0xA5, 0xE4, 0xFF, 0xCE, 0xED, 0xCE, 0xEE,
+	0xD3, 0x95, 0x61, 0xE5, 0x60, 0x12, 0x07, 0x6B,
+/*1C10*/0x40, 0x39, 0x74, 0x20, 0x2E, 0xF5, 0x82, 0xE4,
+	0x34, 0x03, 0xF5, 0x83, 0xE0, 0x70, 0x03, 0xFF,
+/*1C20*/0x80, 0x26, 0x12, 0x08, 0xE2, 0xFD, 0xC3, 0x9F,
+	0x40, 0x1E, 0xCF, 0xED, 0xCF, 0xEB, 0x4A, 0x70,
+/*1C30*/0x0B, 0x8D, 0x42, 0x12, 0x08, 0xEE, 0xF5, 0x41,
+	0x8E, 0x40, 0x80, 0x0C, 0x12, 0x08, 0xE2, 0xF5,
+/*1C40*/0x38, 0x12, 0x08, 0xEE, 0xF5, 0x39, 0x8E, 0x3A,
+	0x1E, 0x80, 0xBC, 0x22, 0x75, 0x58, 0x01, 0xE5,
+/*1C50*/0x35, 0x70, 0x0C, 0x12, 0x07, 0xCC, 0xE0, 0xF5,
+	0x4A, 0x12, 0x07, 0xD8, 0xE0, 0xF5, 0x4C, 0xE5,
+/*1C60*/0x35, 0xB4, 0x04, 0x0C, 0x12, 0x07, 0xE4, 0xE0,
+	0xF5, 0x4A, 0x12, 0x07, 0xF0, 0xE0, 0xF5, 0x4C,
+/*1C70*/0xE5, 0x35, 0xB4, 0x01, 0x04, 0x7F, 0x01, 0x80,
+	0x02, 0x7F, 0x00, 0xE5, 0x35, 0xB4, 0x02, 0x04,
+/*1C80*/0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F,
+	0x60, 0x0C, 0x12, 0x07, 0xFC, 0xE0, 0xF5, 0x4A,
+/*1C90*/0x12, 0x08, 0x08, 0xE0, 0xF5, 0x4C, 0x85, 0x41,
+	0x49, 0x85, 0x40, 0x4B, 0x22, 0x75, 0x5B, 0x01,
+/*1CA0*/0x90, 0x07, 0x24, 0x12, 0x07, 0x35, 0xE0, 0x54,
+	0x1F, 0xFF, 0xD3, 0x94, 0x02, 0x50, 0x04, 0x8F,
+/*1CB0*/0x58, 0x80, 0x05, 0xEF, 0x24, 0xFE, 0xF5, 0x58,
+	0xEF, 0xC3, 0x94, 0x18, 0x40, 0x05, 0x75, 0x59,
+/*1CC0*/0x18, 0x80, 0x04, 0xEF, 0x04, 0xF5, 0x59, 0x85,
+	0x43, 0x5A, 0xAF, 0x58, 0x7E, 0x00, 0xAD, 0x59,
+/*1CD0*/0x7C, 0x00, 0xAB, 0x5B, 0x7A, 0x00, 0x12, 0x15,
+	0x41, 0xAF, 0x5A, 0x7E, 0x00, 0x12, 0x18, 0x0A,
+/*1CE0*/0xAF, 0x5B, 0x7E, 0x00, 0x02, 0x1A, 0xFF, 0xE5,
+	0xE2, 0x30, 0xE7, 0x0E, 0x12, 0x10, 0x03, 0xC2,
+/*1CF0*/0x30, 0x30, 0x30, 0x03, 0x12, 0x10, 0xFF, 0x20,
+	0x33, 0x28, 0xE5, 0xE7, 0x30, 0xE7, 0x05, 0x12,
+/*1D00*/0x0E, 0xA2, 0x80, 0x0D, 0xE5, 0xFE, 0xC3, 0x94,
+	0x20, 0x50, 0x06, 0x12, 0x0E, 0xA2, 0x43, 0xF9,
+/*1D10*/0x08, 0xE5, 0xF2, 0x30, 0xE7, 0x03, 0x53, 0xF9,
+	0x7F, 0xE5, 0xF1, 0x54, 0x70, 0xD3, 0x94, 0x00,
+/*1D20*/0x50, 0xD8, 0x22, 0x12, 0x0E, 0x04, 0x75, 0x83,
+	0x80, 0xE4, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0x12,
+/*1D30*/0x0D, 0xFD, 0x75, 0x83, 0x84, 0x12, 0x0E, 0x02,
+	0x75, 0x83, 0x86, 0x12, 0x0E, 0x02, 0x75, 0x83,
+/*1D40*/0x8C, 0xE0, 0x54, 0xF3, 0x12, 0x0E, 0x03, 0x75,
+	0x83, 0x8E, 0x12, 0x0E, 0x02, 0x75, 0x83, 0x94,
+/*1D50*/0xE0, 0x54, 0xFB, 0xF0, 0x22, 0x12, 0x07, 0x2A,
+	0x75, 0x83, 0x8E, 0xE4, 0x12, 0x07, 0x29, 0x74,
+/*1D60*/0x01, 0x12, 0x07, 0x29, 0xE4, 0x12, 0x08, 0xBE,
+	0x75, 0x83, 0x8C, 0xE0, 0x44, 0x20, 0x12, 0x08,
+/*1D70*/0xBE, 0xE0, 0x54, 0xDF, 0xF0, 0x74, 0x84, 0x85,
+	0x08, 0x82, 0xF5, 0x83, 0xE0, 0x54, 0x7F, 0xF0,
+/*1D80*/0xE0, 0x44, 0x80, 0xF0, 0x22, 0x75, 0x56, 0x01,
+	0xE4, 0xFD, 0xF5, 0x57, 0xAF, 0x35, 0xFE, 0xFC,
+/*1D90*/0x12, 0x09, 0x15, 0x12, 0x1C, 0x9D, 0x12, 0x1E,
+	0x7A, 0x12, 0x1C, 0x4C, 0xAF, 0x57, 0x7E, 0x00,
+/*1DA0*/0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44, 0xAF,
+	0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0x75, 0x56,
+/*1DB0*/0x01, 0xE4, 0xFD, 0xF5, 0x57, 0xAF, 0x35, 0xFE,
+	0xFC, 0x12, 0x09, 0x15, 0x12, 0x1C, 0x9D, 0x12,
+/*1DC0*/0x1E, 0x7A, 0x12, 0x1C, 0x4C, 0xAF, 0x57, 0x7E,
+	0x00, 0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44,
+/*1DD0*/0xAF, 0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0xE4,
+	0xF5, 0x16, 0x12, 0x0E, 0x44, 0xFE, 0xE5, 0x08,
+/*1DE0*/0x44, 0x05, 0xFF, 0x12, 0x0E, 0x65, 0x8F, 0x82,
+	0x8E, 0x83, 0xF0, 0x05, 0x16, 0xE5, 0x16, 0xC3,
+/*1DF0*/0x94, 0x14, 0x40, 0xE6, 0xE5, 0x08, 0x12, 0x0E,
+	0x2B, 0xE4, 0xF0, 0x22, 0xE4, 0xF5, 0x58, 0xF5,
+/*1E00*/0x59, 0xF5, 0x5A, 0xFF, 0xFE, 0xAD, 0x58, 0xFC,
+	0x12, 0x09, 0x15, 0x7F, 0x04, 0x7E, 0x00, 0xAD,
+/*1E10*/0x58, 0x7C, 0x00, 0x12, 0x09, 0x15, 0x7F, 0x02,
+	0x7E, 0x00, 0xAD, 0x58, 0x7C, 0x00, 0x02, 0x09,
+/*1E20*/0x15, 0xE5, 0x3C, 0x25, 0x3E, 0xFC, 0xE5, 0x42,
+	0x24, 0x00, 0xFB, 0xE4, 0x33, 0xFA, 0xEC, 0xC3,
+/*1E30*/0x9B, 0xEA, 0x12, 0x07, 0x6B, 0x40, 0x0B, 0x8C,
+	0x42, 0xE5, 0x3D, 0x25, 0x3F, 0xF5, 0x41, 0x8F,
+/*1E40*/0x40, 0x22, 0x12, 0x09, 0x0B, 0x22, 0x74, 0x84,
+	0xF5, 0x18, 0x85, 0x08, 0x19, 0x85, 0x19, 0x82,
+/*1E50*/0x85, 0x18, 0x83, 0xE0, 0x54, 0x7F, 0xF0, 0xE0,
+	0x44, 0x80, 0xF0, 0xE0, 0x44, 0x80, 0xF0, 0x22,
+/*1E60*/0xEF, 0x4E, 0x70, 0x0B, 0x12, 0x07, 0x2A, 0x75,
+	0x83, 0xD2, 0xE0, 0x54, 0xDF, 0xF0, 0x22, 0x12,
+/*1E70*/0x07, 0x2A, 0x75, 0x83, 0xD2, 0xE0, 0x44, 0x20,
+	0xF0, 0x22, 0x75, 0x58, 0x01, 0x90, 0x07, 0x26,
+/*1E80*/0x12, 0x07, 0x35, 0xE0, 0x54, 0x3F, 0xF5, 0x41,
+	0x12, 0x07, 0x32, 0xE0, 0x54, 0x3F, 0xF5, 0x40,
+/*1E90*/0x22, 0x75, 0x56, 0x02, 0xE4, 0xF5, 0x57, 0x12,
+	0x1D, 0xFC, 0xAF, 0x57, 0x7E, 0x00, 0xAD, 0x56,
+/*1EA0*/0x7C, 0x00, 0x02, 0x04, 0x44, 0xE4, 0xF5, 0x42,
+	0xF5, 0x41, 0xF5, 0x40, 0xF5, 0x38, 0xF5, 0x39,
+/*1EB0*/0xF5, 0x3A, 0x22, 0xEF, 0x54, 0x07, 0xFF, 0xE5,
+	0xF9, 0x54, 0xF8, 0x4F, 0xF5, 0xF9, 0x22, 0x7F,
+/*1EC0*/0x01, 0xE4, 0xFE, 0x0F, 0x0E, 0xBE, 0xFF, 0xFB,
+	0x22, 0x01, 0x20, 0x00, 0x01, 0x04, 0x20, 0x00,
+/*1ED0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1EE0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1EF0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F00*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F10*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F20*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F30*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F40*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F50*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F60*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F70*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F80*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F90*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FA0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FB0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FC0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FD0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FE0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FF0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x81
+};
+
+int ipath_sd7220_ib_load(struct ipath_devdata *dd)
+{
+	return ipath_sd7220_prog_ld(dd, IB_7220_SERDES, ipath_sd7220_ib_img,
+		sizeof(ipath_sd7220_ib_img), 0);
+}
+
+int ipath_sd7220_ib_vfy(struct ipath_devdata *dd)
+{
+	return ipath_sd7220_prog_vfy(dd, IB_7220_SERDES, ipath_sd7220_ib_img,
+		sizeof(ipath_sd7220_ib_img), 0);
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
new file mode 100644
index 0000000..1974df7
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -0,0 +1,790 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+
+#include "ipath_kernel.h"
+#include "ipath_verbs.h"
+#include "ipath_common.h"
+
+#define SDMA_DESCQ_SZ PAGE_SIZE /* 256 entries per 4KB page */
+
+static void vl15_watchdog_enq(struct ipath_devdata *dd)
+{
+	/* ipath_sdma_lock must already be held */
+	if (atomic_inc_return(&dd->ipath_sdma_vl15_count) == 1) {
+		unsigned long interval = (HZ + 19) / 20;
+		dd->ipath_sdma_vl15_timer.expires = jiffies + interval;
+		add_timer(&dd->ipath_sdma_vl15_timer);
+	}
+}
+
+static void vl15_watchdog_deq(struct ipath_devdata *dd)
+{
+	/* ipath_sdma_lock must already be held */
+	if (atomic_dec_return(&dd->ipath_sdma_vl15_count) != 0) {
+		unsigned long interval = (HZ + 19) / 20;
+		mod_timer(&dd->ipath_sdma_vl15_timer, jiffies + interval);
+	} else {
+		del_timer(&dd->ipath_sdma_vl15_timer);
+	}
+}
+
+static void vl15_watchdog_timeout(unsigned long opaque)
+{
+	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+
+	if (atomic_read(&dd->ipath_sdma_vl15_count) != 0) {
+		ipath_dbg("vl15 watchdog timeout - clearing\n");
+		ipath_cancel_sends(dd, 1);
+		ipath_hol_down(dd);
+	} else {
+		ipath_dbg("vl15 watchdog timeout - "
+			  "condition already cleared\n");
+	}
+}
+
+static void unmap_desc(struct ipath_devdata *dd, unsigned head)
+{
+	__le64 *descqp = &dd->ipath_sdma_descq[head].qw[0];
+	u64 desc[2];
+	dma_addr_t addr;
+	size_t len;
+
+	desc[0] = le64_to_cpu(descqp[0]);
+	desc[1] = le64_to_cpu(descqp[1]);
+
+	addr = (desc[1] << 32) | (desc[0] >> 32);
+	len = (desc[0] >> 14) & (0x7ffULL << 2);
+	dma_unmap_single(&dd->pcidev->dev, addr, len, DMA_TO_DEVICE);
+}
+
+/*
+ * ipath_sdma_lock should be locked before calling this.
+ */
+int ipath_sdma_make_progress(struct ipath_devdata *dd)
+{
+	struct list_head *lp = NULL;
+	struct ipath_sdma_txreq *txp = NULL;
+	u16 dmahead;
+	u16 start_idx = 0;
+	int progress = 0;
+
+	if (!list_empty(&dd->ipath_sdma_activelist)) {
+		lp = dd->ipath_sdma_activelist.next;
+		txp = list_entry(lp, struct ipath_sdma_txreq, list);
+		start_idx = txp->start_idx;
+	}
+
+	/*
+	 * Read the SDMA head register in order to know that the
+	 * interrupt clear has been written to the chip.
+	 * Otherwise, we may not get an interrupt for the last
+	 * descriptor in the queue.
+	 */
+	dmahead = (u16)ipath_read_kreg32(dd, dd->ipath_kregs->kr_senddmahead);
+	/* sanity check return value for error handling (chip reset, etc.) */
+	if (dmahead >= dd->ipath_sdma_descq_cnt)
+		goto done;
+
+	while (dd->ipath_sdma_descq_head != dmahead) {
+		if (txp && txp->flags & IPATH_SDMA_TXREQ_F_FREEDESC &&
+		    dd->ipath_sdma_descq_head == start_idx) {
+			unmap_desc(dd, dd->ipath_sdma_descq_head);
+			start_idx++;
+			if (start_idx == dd->ipath_sdma_descq_cnt)
+				start_idx = 0;
+		}
+
+		/* increment free count and head */
+		dd->ipath_sdma_descq_removed++;
+		if (++dd->ipath_sdma_descq_head == dd->ipath_sdma_descq_cnt)
+			dd->ipath_sdma_descq_head = 0;
+
+		if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) {
+			/* move to notify list */
+			if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
+				vl15_watchdog_deq(dd);
+			list_move_tail(lp, &dd->ipath_sdma_notifylist);
+			if (!list_empty(&dd->ipath_sdma_activelist)) {
+				lp = dd->ipath_sdma_activelist.next;
+				txp = list_entry(lp, struct ipath_sdma_txreq,
+						 list);
+				start_idx = txp->start_idx;
+			} else {
+				lp = NULL;
+				txp = NULL;
+			}
+		}
+		progress = 1;
+	}
+
+	if (progress)
+		tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
+
+done:
+	return progress;
+}
+
+static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list)
+{
+	struct ipath_sdma_txreq *txp, *txp_next;
+
+	list_for_each_entry_safe(txp, txp_next, list, list) {
+		list_del_init(&txp->list);
+
+		if (txp->callback)
+			(*txp->callback)(txp->callback_cookie,
+					 txp->callback_status);
+	}
+}
+
+static void sdma_notify_taskbody(struct ipath_devdata *dd)
+{
+	unsigned long flags;
+	struct list_head list;
+
+	INIT_LIST_HEAD(&list);
+
+	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+	list_splice_init(&dd->ipath_sdma_notifylist, &list);
+
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+	ipath_sdma_notify(dd, &list);
+
+	/*
+	 * The IB verbs layer needs to see the callback before getting
+	 * the call to ipath_ib_piobufavail() because the callback
+	 * handles releasing resources the next send will need.
+	 * Otherwise, we could do these calls in
+	 * ipath_sdma_make_progress().
+	 */
+	ipath_ib_piobufavail(dd->verbs_dev);
+}
+
+static void sdma_notify_task(unsigned long opaque)
+{
+	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+
+	if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+		sdma_notify_taskbody(dd);
+}
+
+static void dump_sdma_state(struct ipath_devdata *dd)
+{
+	unsigned long reg;
+
+	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus);
+	ipath_cdbg(VERBOSE, "kr_senddmastatus: 0x%016lx\n", reg);
+
+	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendctrl);
+	ipath_cdbg(VERBOSE, "kr_sendctrl: 0x%016lx\n", reg);
+
+	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask0);
+	ipath_cdbg(VERBOSE, "kr_senddmabufmask0: 0x%016lx\n", reg);
+
+	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask1);
+	ipath_cdbg(VERBOSE, "kr_senddmabufmask1: 0x%016lx\n", reg);
+
+	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask2);
+	ipath_cdbg(VERBOSE, "kr_senddmabufmask2: 0x%016lx\n", reg);
+
+	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
+	ipath_cdbg(VERBOSE, "kr_senddmatail: 0x%016lx\n", reg);
+
+	reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
+	ipath_cdbg(VERBOSE, "kr_senddmahead: 0x%016lx\n", reg);
+}
+
+static void sdma_abort_task(unsigned long opaque)
+{
+	struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
+	u64 status;
+	unsigned long flags;
+
+	if (test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+		return;
+
+	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+	status = dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK;
+
+	/* nothing to do */
+	if (status == IPATH_SDMA_ABORT_NONE)
+		goto unlock;
+
+	/* ipath_sdma_abort() is done, waiting for interrupt */
+	if (status == IPATH_SDMA_ABORT_DISARMED) {
+		if (jiffies < dd->ipath_sdma_abort_intr_timeout)
+			goto resched_noprint;
+		/* give up, intr got lost somewhere */
+		ipath_dbg("give up waiting for SDMADISABLED intr\n");
+		__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+		status = IPATH_SDMA_ABORT_ABORTED;
+	}
+
+	/* everything is stopped, time to clean up and restart */
+	if (status == IPATH_SDMA_ABORT_ABORTED) {
+		struct ipath_sdma_txreq *txp, *txpnext;
+		u64 hwstatus;
+		int notify = 0;
+
+		hwstatus = ipath_read_kreg64(dd,
+				dd->ipath_kregs->kr_senddmastatus);
+
+		if (/* ScoreBoardDrainInProg */
+		    test_bit(63, &hwstatus) ||
+		    /* AbortInProg */
+		    test_bit(62, &hwstatus) ||
+		    /* InternalSDmaEnable */
+		    test_bit(61, &hwstatus) ||
+		    /* ScbEmpty */
+		    !test_bit(30, &hwstatus)) {
+			if (dd->ipath_sdma_reset_wait > 0) {
+				/* not done shutting down sdma */
+				--dd->ipath_sdma_reset_wait;
+				goto resched;
+			}
+			ipath_cdbg(VERBOSE, "gave up waiting for quiescent "
+				"status after SDMA reset, continuing\n");
+			dump_sdma_state(dd);
+		}
+
+		/* dequeue all "sent" requests */
+		list_for_each_entry_safe(txp, txpnext,
+					 &dd->ipath_sdma_activelist, list) {
+			txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED;
+			if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
+				vl15_watchdog_deq(dd);
+			list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
+			notify = 1;
+		}
+		if (notify)
+			tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
+
+		/* reset our notion of head and tail */
+		dd->ipath_sdma_descq_tail = 0;
+		dd->ipath_sdma_descq_head = 0;
+		dd->ipath_sdma_head_dma[0] = 0;
+		dd->ipath_sdma_generation = 0;
+		dd->ipath_sdma_descq_removed = dd->ipath_sdma_descq_added;
+
+		/* Reset SendDmaLenGen */
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen,
+			(u64) dd->ipath_sdma_descq_cnt | (1ULL << 18));
+
+		/* done with sdma state for a bit */
+		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+		/*
+		 * Don't restart sdma here. Wait until link is up to ACTIVE.
+		 * VL15 MADs used to bring the link up use PIO, and multiple
+		 * link transitions otherwise cause the sdma engine to be
+		 * stopped and started multiple times.
+		 * The disable is done here, including the shadow, so the
+		 * state is kept consistent.
+		 * See ipath_restart_sdma() for the actual starting of sdma.
+		 */
+		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+		dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+				 dd->ipath_sendctrl);
+		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+		/* make sure I see next message */
+		dd->ipath_sdma_abort_jiffies = 0;
+
+		goto done;
+	}
+
+resched:
+	/*
+	 * for now, keep spinning
+	 * JAG - this is bad to just have default be a loop without
+	 * state change
+	 */
+	if (jiffies > dd->ipath_sdma_abort_jiffies) {
+		ipath_dbg("looping with status 0x%016llx\n",
+			  dd->ipath_sdma_status);
+		dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
+	}
+resched_noprint:
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+	if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+		tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
+	return;
+
+unlock:
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+done:
+	return;
+}
+
+/*
+ * This is called from interrupt context.
+ */
+void ipath_sdma_intr(struct ipath_devdata *dd)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+	(void) ipath_sdma_make_progress(dd);
+
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+}
+
+static int alloc_sdma(struct ipath_devdata *dd)
+{
+	int ret = 0;
+
+	/* Allocate memory for SendDMA descriptor FIFO */
+	dd->ipath_sdma_descq = dma_alloc_coherent(&dd->pcidev->dev,
+		SDMA_DESCQ_SZ, &dd->ipath_sdma_descq_phys, GFP_KERNEL);
+
+	if (!dd->ipath_sdma_descq) {
+		ipath_dev_err(dd, "failed to allocate SendDMA descriptor "
+			"FIFO memory\n");
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	dd->ipath_sdma_descq_cnt =
+		SDMA_DESCQ_SZ / sizeof(struct ipath_sdma_desc);
+
+	/* Allocate memory for DMA of head register to memory */
+	dd->ipath_sdma_head_dma = dma_alloc_coherent(&dd->pcidev->dev,
+		PAGE_SIZE, &dd->ipath_sdma_head_phys, GFP_KERNEL);
+	if (!dd->ipath_sdma_head_dma) {
+		ipath_dev_err(dd, "failed to allocate SendDMA head memory\n");
+		ret = -ENOMEM;
+		goto cleanup_descq;
+	}
+	dd->ipath_sdma_head_dma[0] = 0;
+
+	init_timer(&dd->ipath_sdma_vl15_timer);
+	dd->ipath_sdma_vl15_timer.function = vl15_watchdog_timeout;
+	dd->ipath_sdma_vl15_timer.data = (unsigned long)dd;
+	atomic_set(&dd->ipath_sdma_vl15_count, 0);
+
+	goto done;
+
+cleanup_descq:
+	dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
+		(void *)dd->ipath_sdma_descq, dd->ipath_sdma_descq_phys);
+	dd->ipath_sdma_descq = NULL;
+	dd->ipath_sdma_descq_phys = 0;
+done:
+	return ret;
+}
+
+int setup_sdma(struct ipath_devdata *dd)
+{
+	int ret = 0;
+	unsigned i, n;
+	u64 tmp64;
+	u64 senddmabufmask[3] = { 0 };
+	unsigned long flags;
+
+	ret = alloc_sdma(dd);
+	if (ret)
+		goto done;
+
+	if (!dd->ipath_sdma_descq) {
+		ipath_dev_err(dd, "SendDMA memory not allocated\n");
+		goto done;
+	}
+
+	dd->ipath_sdma_status = 0;
+	dd->ipath_sdma_abort_jiffies = 0;
+	dd->ipath_sdma_generation = 0;
+	dd->ipath_sdma_descq_tail = 0;
+	dd->ipath_sdma_descq_head = 0;
+	dd->ipath_sdma_descq_removed = 0;
+	dd->ipath_sdma_descq_added = 0;
+
+	/* Set SendDmaBase */
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase,
+			 dd->ipath_sdma_descq_phys);
+	/* Set SendDmaLenGen */
+	tmp64 = dd->ipath_sdma_descq_cnt;
+	tmp64 |= 1<<18; /* enable generation checking */
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, tmp64);
+	/* Set SendDmaTail */
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail,
+			 dd->ipath_sdma_descq_tail);
+	/* Set SendDmaHeadAddr */
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
+			 dd->ipath_sdma_head_phys);
+
+	/* Reserve all the former "kernel" piobufs */
+	n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - dd->ipath_pioreserved;
+	for (i = dd->ipath_lastport_piobuf; i < n; ++i) {
+		unsigned word = i / 64;
+		unsigned bit = i & 63;
+		BUG_ON(word >= 3);
+		senddmabufmask[word] |= 1ULL << bit;
+	}
+	ipath_chg_pioavailkernel(dd, dd->ipath_lastport_piobuf,
+		n - dd->ipath_lastport_piobuf, 0);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
+			 senddmabufmask[0]);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
+			 senddmabufmask[1]);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2,
+			 senddmabufmask[2]);
+
+	INIT_LIST_HEAD(&dd->ipath_sdma_activelist);
+	INIT_LIST_HEAD(&dd->ipath_sdma_notifylist);
+
+	tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task,
+		     (unsigned long) dd);
+	tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task,
+		     (unsigned long) dd);
+
+	/*
+	 * No use to turn on SDMA here, as link is probably not ACTIVE
+	 * Just mark it RUNNING and enable the interrupt, and let the
+	 * ipath_restart_sdma() on link transition to ACTIVE actually
+	 * enable it.
+	 */
+	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+	dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	__set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
+	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+done:
+	return ret;
+}
+
+void teardown_sdma(struct ipath_devdata *dd)
+{
+	struct ipath_sdma_txreq *txp, *txpnext;
+	unsigned long flags;
+	dma_addr_t sdma_head_phys = 0;
+	dma_addr_t sdma_descq_phys = 0;
+	void *sdma_descq = NULL;
+	void *sdma_head_dma = NULL;
+
+	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+	__clear_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
+	__set_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+	__set_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status);
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+	tasklet_kill(&dd->ipath_sdma_abort_task);
+	tasklet_kill(&dd->ipath_sdma_notify_task);
+
+	/* turn off sdma */
+	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+	dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+		dd->ipath_sendctrl);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+	/* dequeue all "sent" requests */
+	list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist,
+				 list) {
+		txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN;
+		if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
+			vl15_watchdog_deq(dd);
+		list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
+	}
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+	sdma_notify_taskbody(dd);
+
+	del_timer_sync(&dd->ipath_sdma_vl15_timer);
+
+	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+	dd->ipath_sdma_abort_jiffies = 0;
+
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, 0);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, 0);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, 0);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 0);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 0);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 0);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, 0);
+
+	if (dd->ipath_sdma_head_dma) {
+		sdma_head_dma = (void *) dd->ipath_sdma_head_dma;
+		sdma_head_phys = dd->ipath_sdma_head_phys;
+		dd->ipath_sdma_head_dma = NULL;
+		dd->ipath_sdma_head_phys = 0;
+	}
+
+	if (dd->ipath_sdma_descq) {
+		sdma_descq = dd->ipath_sdma_descq;
+		sdma_descq_phys = dd->ipath_sdma_descq_phys;
+		dd->ipath_sdma_descq = NULL;
+		dd->ipath_sdma_descq_phys = 0;
+	}
+
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+	if (sdma_head_dma)
+		dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+				  sdma_head_dma, sdma_head_phys);
+
+	if (sdma_descq)
+		dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
+				  sdma_descq, sdma_descq_phys);
+}
+
+/*
+ * [Re]start SDMA, if we use it, and it's not already OK.
+ * This is called on transition to link ACTIVE, either the first or
+ * subsequent times.
+ */
+void ipath_restart_sdma(struct ipath_devdata *dd)
+{
+	unsigned long flags;
+	int needed = 1;
+
+	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
+		goto bail;
+
+	/*
+	 * First, make sure we should, which is to say,
+	 * check that we are "RUNNING" (not in teardown)
+	 * and not "SHUTDOWN"
+	 */
+	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+	if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)
+		|| test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+			needed = 0;
+	else {
+		__clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+		__clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
+		__clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+	}
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+	if (!needed) {
+		ipath_dbg("invalid attempt to restart SDMA, status 0x%016llx\n",
+			dd->ipath_sdma_status);
+		goto bail;
+	}
+	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+	/*
+	 * First clear, just to be safe. Enable is only done
+	 * in chip on 0->1 transition
+	 */
+	dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+bail:
+	return;
+}
+
+static inline void make_sdma_desc(struct ipath_devdata *dd,
+	u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset)
+{
+	WARN_ON(addr & 3);
+	/* SDmaPhyAddr[47:32] */
+	sdmadesc[1] = addr >> 32;
+	/* SDmaPhyAddr[31:0] */
+	sdmadesc[0] = (addr & 0xfffffffcULL) << 32;
+	/* SDmaGeneration[1:0] */
+	sdmadesc[0] |= (dd->ipath_sdma_generation & 3ULL) << 30;
+	/* SDmaDwordCount[10:0] */
+	sdmadesc[0] |= (dwlen & 0x7ffULL) << 16;
+	/* SDmaBufOffset[12:2] */
+	sdmadesc[0] |= dwoffset & 0x7ffULL;
+}
+
+/*
+ * This function queues one IB packet onto the send DMA queue per call.
+ * The caller is responsible for checking:
+ * 1) The number of send DMA descriptor entries is less than the size of
+ *    the descriptor queue.
+ * 2) The IB SGE addresses and lengths are 32-bit aligned
+ *    (except possibly the last SGE's length)
+ * 3) The SGE addresses are suitable for passing to dma_map_single().
+ */
+int ipath_sdma_verbs_send(struct ipath_devdata *dd,
+	struct ipath_sge_state *ss, u32 dwords,
+	struct ipath_verbs_txreq *tx)
+{
+
+	unsigned long flags;
+	struct ipath_sge *sge;
+	int ret = 0;
+	u16 tail;
+	__le64 *descqp;
+	u64 sdmadesc[2];
+	u32 dwoffset;
+	dma_addr_t addr;
+
+	if ((tx->map_len + (dwords<<2)) > dd->ipath_ibmaxlen) {
+		ipath_dbg("packet size %X > ibmax %X, fail\n",
+			tx->map_len + (dwords<<2), dd->ipath_ibmaxlen);
+		ret = -EMSGSIZE;
+		goto fail;
+	}
+
+	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+retry:
+	if (unlikely(test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status))) {
+		ret = -EBUSY;
+		goto unlock;
+	}
+
+	if (tx->txreq.sg_count > ipath_sdma_descq_freecnt(dd)) {
+		if (ipath_sdma_make_progress(dd))
+			goto retry;
+		ret = -ENOBUFS;
+		goto unlock;
+	}
+
+	addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
+			      tx->map_len, DMA_TO_DEVICE);
+	if (dma_mapping_error(addr)) {
+		ret = -EIO;
+		goto unlock;
+	}
+
+	dwoffset = tx->map_len >> 2;
+	make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
+
+	/* SDmaFirstDesc */
+	sdmadesc[0] |= 1ULL << 12;
+	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
+		sdmadesc[0] |= 1ULL << 14;	/* SDmaUseLargeBuf */
+
+	/* write to the descq */
+	tail = dd->ipath_sdma_descq_tail;
+	descqp = &dd->ipath_sdma_descq[tail].qw[0];
+	*descqp++ = cpu_to_le64(sdmadesc[0]);
+	*descqp++ = cpu_to_le64(sdmadesc[1]);
+
+	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEDESC)
+		tx->txreq.start_idx = tail;
+
+	/* increment the tail */
+	if (++tail == dd->ipath_sdma_descq_cnt) {
+		tail = 0;
+		descqp = &dd->ipath_sdma_descq[0].qw[0];
+		++dd->ipath_sdma_generation;
+	}
+
+	sge = &ss->sge;
+	while (dwords) {
+		u32 dw;
+		u32 len;
+
+		len = dwords << 2;
+		if (len > sge->length)
+			len = sge->length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
+		BUG_ON(len == 0);
+		dw = (len + 3) >> 2;
+		addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
+				      DMA_TO_DEVICE);
+		make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
+		/* SDmaUseLargeBuf has to be set in every descriptor */
+		if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
+			sdmadesc[0] |= 1ULL << 14;
+		/* write to the descq */
+		*descqp++ = cpu_to_le64(sdmadesc[0]);
+		*descqp++ = cpu_to_le64(sdmadesc[1]);
+
+		/* increment the tail */
+		if (++tail == dd->ipath_sdma_descq_cnt) {
+			tail = 0;
+			descqp = &dd->ipath_sdma_descq[0].qw[0];
+			++dd->ipath_sdma_generation;
+		}
+		sge->vaddr += len;
+		sge->length -= len;
+		sge->sge_length -= len;
+		if (sge->sge_length == 0) {
+			if (--ss->num_sge)
+				*sge = *ss->sg_list++;
+		} else if (sge->length == 0 && sge->mr != NULL) {
+			if (++sge->n >= IPATH_SEGSZ) {
+				if (++sge->m >= sge->mr->mapsz)
+					break;
+				sge->n = 0;
+			}
+			sge->vaddr =
+				sge->mr->map[sge->m]->segs[sge->n].vaddr;
+			sge->length =
+				sge->mr->map[sge->m]->segs[sge->n].length;
+		}
+
+		dwoffset += dw;
+		dwords -= dw;
+	}
+
+	if (!tail)
+		descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0];
+	descqp -= 2;
+	/* SDmaLastDesc */
+	descqp[0] |= __constant_cpu_to_le64(1ULL << 11);
+	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) {
+		/* SDmaIntReq */
+		descqp[0] |= __constant_cpu_to_le64(1ULL << 15);
+	}
+
+	/* Commit writes to memory and advance the tail on the chip */
+	wmb();
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
+
+	tx->txreq.next_descq_idx = tail;
+	tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK;
+	dd->ipath_sdma_descq_tail = tail;
+	dd->ipath_sdma_descq_added += tx->txreq.sg_count;
+	list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
+	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
+		vl15_watchdog_enq(dd);
+
+unlock:
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+fail:
+	return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index f772102..e3d80ca 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -245,7 +245,8 @@
 						 sizeof(offset_addr));
 			if (ret)
 				goto bail_free;
-			udata->outbuf = (void __user *) offset_addr;
+			udata->outbuf =
+				(void __user *) (unsigned long) offset_addr;
 			ret = ib_copy_to_udata(udata, &offset,
 					       sizeof(offset));
 			if (ret)
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index d2725cd..c8e3d65 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -136,6 +136,7 @@
 	struct ipath_portdata *pd = dd->ipath_pd[0];
 	size_t blen = 0;
 	char buf[128];
+	u32 hdrqtail;
 
 	*buf = 0;
 	if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) {
@@ -174,17 +175,18 @@
 	if (blen)
 		ipath_dbg("%s\n", buf);
 
-	if (pd->port_head != (u32)
-	    le64_to_cpu(*dd->ipath_hdrqtailptr)) {
+	hdrqtail = ipath_get_hdrqtail(pd);
+	if (pd->port_head != hdrqtail) {
 		if (dd->ipath_lastport0rcv_cnt ==
 		    ipath_stats.sps_port0pkts) {
 			ipath_cdbg(PKT, "missing rcv interrupts? "
-				   "port0 hd=%llx tl=%x; port0pkts %llx\n",
-				   (unsigned long long)
-				   le64_to_cpu(*dd->ipath_hdrqtailptr),
-				   pd->port_head,
+				   "port0 hd=%x tl=%x; port0pkts %llx; write"
+				   " hd (w/intr)\n",
+				   pd->port_head, hdrqtail,
 				   (unsigned long long)
 				   ipath_stats.sps_port0pkts);
+			ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail |
+				dd->ipath_rhdrhead_intr_off, pd->port_port);
 		}
 		dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
 	}
@@ -290,11 +292,11 @@
 	    && time_after(jiffies, dd->ipath_unmasktime)) {
 		char ebuf[256];
 		int iserr;
-		iserr = ipath_decode_err(ebuf, sizeof ebuf,
-			dd->ipath_maskederrs);
+		iserr = ipath_decode_err(dd, ebuf, sizeof ebuf,
+					 dd->ipath_maskederrs);
 		if (dd->ipath_maskederrs &
-				~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
-				INFINIPATH_E_PKTERRS ))
+		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+		      INFINIPATH_E_PKTERRS))
 			ipath_dev_err(dd, "Re-enabling masked errors "
 				      "(%s)\n", ebuf);
 		else {
@@ -306,17 +308,18 @@
 			 * level.
 			 */
 			if (iserr)
-					ipath_dbg("Re-enabling queue full errors (%s)\n",
-							ebuf);
+				ipath_dbg(
+					"Re-enabling queue full errors (%s)\n",
+					ebuf);
 			else
 				ipath_cdbg(ERRPKT, "Re-enabling packet"
-						" problem interrupt (%s)\n", ebuf);
+					" problem interrupt (%s)\n", ebuf);
 		}
 
 		/* re-enable masked errors */
 		dd->ipath_errormask |= dd->ipath_maskederrs;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
-			dd->ipath_errormask);
+				 dd->ipath_errormask);
 		dd->ipath_maskederrs = 0;
 	}
 
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index 56dfc8a..a6c8efb 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -34,6 +34,7 @@
 #include <linux/ctype.h>
 
 #include "ipath_kernel.h"
+#include "ipath_verbs.h"
 #include "ipath_common.h"
 
 /**
@@ -163,6 +164,15 @@
 	return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
 }
 
+static ssize_t show_localbus_info(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	struct ipath_devdata *dd = dev_get_drvdata(dev);
+	/* The string printed here is already newline-terminated. */
+	return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_lbus_info);
+}
+
 static ssize_t show_lmc(struct device *dev,
 			struct device_attribute *attr,
 			char *buf)
@@ -311,6 +321,8 @@
 
 	dd->ipath_guid = new_guid;
 	dd->ipath_nguid = 1;
+	if (dd->verbs_dev)
+		dd->verbs_dev->ibdev.node_guid = new_guid;
 
 	ret = strlen(buf);
 	goto bail;
@@ -919,21 +931,21 @@
 	u16 val;
 
 	ret = ipath_parse_ushort(buf, &val);
-	if (ret < 0 || val > 1)
-		goto invalid;
-
-	r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val);
-	if (r < 0) {
-		ret = r;
+	if (ret >= 0 && val > 1) {
+		ipath_dev_err(dd,
+			"attempt to set invalid Rx Polarity (enable)\n");
+		ret = -EINVAL;
 		goto bail;
 	}
 
-	goto bail;
-invalid:
-	ipath_dev_err(dd, "attempt to set invalid Rx Polarity (enable)\n");
+	r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val);
+	if (r < 0)
+		ret = r;
+
 bail:
 	return ret;
 }
+
 /*
  * Get/Set RX lane-reversal enable. 0=no, 1=yes.
  */
@@ -988,6 +1000,75 @@
 	.attrs = driver_attributes
 };
 
+static ssize_t store_tempsense(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf,
+			       size_t count)
+{
+	struct ipath_devdata *dd = dev_get_drvdata(dev);
+	int ret, stat;
+	u16 val;
+
+	ret = ipath_parse_ushort(buf, &val);
+	if (ret <= 0) {
+		ipath_dev_err(dd, "attempt to set invalid tempsense config\n");
+		goto bail;
+	}
+	/* If anything but the highest limit, enable T_CRIT_A "interrupt" */
+	stat = ipath_tempsense_write(dd, 9, (val == 0x7f7f) ? 0x80 : 0);
+	if (stat) {
+		ipath_dev_err(dd, "Unable to set tempsense config\n");
+		ret = -1;
+		goto bail;
+	}
+	stat = ipath_tempsense_write(dd, 0xB, (u8) (val & 0xFF));
+	if (stat) {
+		ipath_dev_err(dd, "Unable to set local Tcrit\n");
+		ret = -1;
+		goto bail;
+	}
+	stat = ipath_tempsense_write(dd, 0xD, (u8) (val >> 8));
+	if (stat) {
+		ipath_dev_err(dd, "Unable to set remote Tcrit\n");
+		ret = -1;
+		goto bail;
+	}
+
+bail:
+	return ret;
+}
+
+/*
+ * dump tempsense regs. in decimal, to ease shell-scripts.
+ */
+static ssize_t show_tempsense(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct ipath_devdata *dd = dev_get_drvdata(dev);
+	int ret;
+	int idx;
+	u8 regvals[8];
+
+	ret = -ENXIO;
+	for (idx = 0; idx < 8; ++idx) {
+		if (idx == 6)
+			continue;
+		ret = ipath_tempsense_read(dd, idx);
+		if (ret < 0)
+			break;
+		regvals[idx] = ret;
+	}
+	if (idx == 8)
+		ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n",
+			*(signed char *)(regvals),
+			*(signed char *)(regvals + 1),
+			regvals[2], regvals[3],
+			*(signed char *)(regvals + 5),
+			*(signed char *)(regvals + 7));
+	return ret;
+}
+
 struct attribute_group *ipath_driver_attr_groups[] = {
 	&driver_attr_group,
 	NULL,
@@ -1011,10 +1092,13 @@
 static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
 static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
 static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
+static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
 static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO,
 		   show_jint_max_packets, store_jint_max_packets);
 static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO,
 		   show_jint_idle_ticks, store_jint_idle_ticks);
+static DEVICE_ATTR(tempsense, S_IWUSR | S_IRUGO,
+		   show_tempsense, store_tempsense);
 
 static struct attribute *dev_attributes[] = {
 	&dev_attr_guid.attr,
@@ -1034,6 +1118,8 @@
 	&dev_attr_rx_pol_inv.attr,
 	&dev_attr_led_override.attr,
 	&dev_attr_logged_errors.attr,
+	&dev_attr_tempsense.attr,
+	&dev_attr_localbus_info.attr,
 	NULL
 };
 
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 2dd8de2..bfe8926 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -94,7 +94,7 @@
 				qp->s_state =
 					OP(SEND_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after the BTH */
-				ohdr->u.imm_data = wqe->wr.imm_data;
+				ohdr->u.imm_data = wqe->wr.ex.imm_data;
 				hwords += 1;
 			}
 			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -123,7 +123,7 @@
 				qp->s_state =
 					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after the RETH */
-				ohdr->u.rc.imm_data = wqe->wr.imm_data;
+				ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
 				hwords += 1;
 				if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 					bth0 |= 1 << 23;
@@ -152,7 +152,7 @@
 		else {
 			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
-			ohdr->u.imm_data = wqe->wr.imm_data;
+			ohdr->u.imm_data = wqe->wr.ex.imm_data;
 			hwords += 1;
 		}
 		if (wqe->wr.send_flags & IB_SEND_SOLICITED)
@@ -177,7 +177,7 @@
 			qp->s_state =
 				OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
-			ohdr->u.imm_data = wqe->wr.imm_data;
+			ohdr->u.imm_data = wqe->wr.ex.imm_data;
 			hwords += 1;
 			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 				bth0 |= 1 << 23;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index de67eed..8b6a261 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -95,7 +95,7 @@
 
 	if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
 		wc.wc_flags = IB_WC_WITH_IMM;
-		wc.imm_data = swqe->wr.imm_data;
+		wc.imm_data = swqe->wr.ex.imm_data;
 	} else {
 		wc.wc_flags = 0;
 		wc.imm_data = 0;
@@ -303,6 +303,7 @@
 	qp->s_hdrwords = 7;
 	qp->s_cur_size = wqe->length;
 	qp->s_cur_sge = &qp->s_sge;
+	qp->s_dmult = ah_attr->static_rate;
 	qp->s_wqe = wqe;
 	qp->s_sge.sge = wqe->sg_list[0];
 	qp->s_sge.sg_list = wqe->sg_list + 1;
@@ -326,7 +327,7 @@
 	}
 	if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
 		qp->s_hdrwords++;
-		ohdr->u.ud.imm_data = wqe->wr.imm_data;
+		ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
 		bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
 	} else
 		bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
new file mode 100644
index 0000000..86e0169
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -0,0 +1,879 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/uio.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+
+#include "ipath_kernel.h"
+#include "ipath_user_sdma.h"
+
+/* minimum size of header */
+#define IPATH_USER_SDMA_MIN_HEADER_LENGTH	64
+/* expected size of headers (for dma_pool) */
+#define IPATH_USER_SDMA_EXP_HEADER_LENGTH	64
+/* length mask in PBC (lower 11 bits) */
+#define IPATH_PBC_LENGTH_MASK			((1 << 11) - 1)
+
+struct ipath_user_sdma_pkt {
+	u8 naddr;		/* dimension of addr (1..3) ... */
+	u32 counter;		/* sdma pkts queued counter for this entry */
+	u64 added;		/* global descq number of entries */
+
+	struct {
+		u32 offset;			/* offset for kvaddr, addr */
+		u32 length;			/* length in page */
+		u8  put_page;			/* should we put_page? */
+		u8  dma_mapped;			/* is page dma_mapped? */
+		struct page *page;		/* may be NULL (coherent mem) */
+		void *kvaddr;			/* FIXME: only for pio hack */
+		dma_addr_t addr;
+	} addr[4];   /* max pages, any more and we coalesce */
+	struct list_head list;	/* list element */
+};
+
+struct ipath_user_sdma_queue {
+	/*
+	 * pkts sent to dma engine are queued on this
+	 * list head.  the type of the elements of this
+	 * list are struct ipath_user_sdma_pkt...
+	 */
+	struct list_head sent;
+
+	/* headers with expected length are allocated from here... */
+	char header_cache_name[64];
+	struct dma_pool *header_cache;
+
+	/* packets are allocated from the slab cache... */
+	char pkt_slab_name[64];
+	struct kmem_cache *pkt_slab;
+
+	/* as packets go on the queued queue, they are counted... */
+	u32 counter;
+	u32 sent_counter;
+
+	/* dma page table */
+	struct rb_root dma_pages_root;
+
+	/* protect everything above... */
+	struct mutex lock;
+};
+
+struct ipath_user_sdma_queue *
+ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport)
+{
+	struct ipath_user_sdma_queue *pq =
+		kmalloc(sizeof(struct ipath_user_sdma_queue), GFP_KERNEL);
+
+	if (!pq)
+		goto done;
+
+	pq->counter = 0;
+	pq->sent_counter = 0;
+	INIT_LIST_HEAD(&pq->sent);
+
+	mutex_init(&pq->lock);
+
+	snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
+		 "ipath-user-sdma-pkts-%u-%02u.%02u", unit, port, sport);
+	pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
+					 sizeof(struct ipath_user_sdma_pkt),
+					 0, 0, NULL);
+
+	if (!pq->pkt_slab)
+		goto err_kfree;
+
+	snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
+		 "ipath-user-sdma-headers-%u-%02u.%02u", unit, port, sport);
+	pq->header_cache = dma_pool_create(pq->header_cache_name,
+					   dev,
+					   IPATH_USER_SDMA_EXP_HEADER_LENGTH,
+					   4, 0);
+	if (!pq->header_cache)
+		goto err_slab;
+
+	pq->dma_pages_root = RB_ROOT;
+
+	goto done;
+
+err_slab:
+	kmem_cache_destroy(pq->pkt_slab);
+err_kfree:
+	kfree(pq);
+	pq = NULL;
+
+done:
+	return pq;
+}
+
+static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt *pkt,
+				      int i, size_t offset, size_t len,
+				      int put_page, int dma_mapped,
+				      struct page *page,
+				      void *kvaddr, dma_addr_t dma_addr)
+{
+	pkt->addr[i].offset = offset;
+	pkt->addr[i].length = len;
+	pkt->addr[i].put_page = put_page;
+	pkt->addr[i].dma_mapped = dma_mapped;
+	pkt->addr[i].page = page;
+	pkt->addr[i].kvaddr = kvaddr;
+	pkt->addr[i].addr = dma_addr;
+}
+
+static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt *pkt,
+					u32 counter, size_t offset,
+					size_t len, int dma_mapped,
+					struct page *page,
+					void *kvaddr, dma_addr_t dma_addr)
+{
+	pkt->naddr = 1;
+	pkt->counter = counter;
+	ipath_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
+				  kvaddr, dma_addr);
+}
+
+/* we've too many pages in the iovec, coalesce to a single page */
+static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
+				    struct ipath_user_sdma_pkt *pkt,
+				    const struct iovec *iov,
+				    unsigned long niov) {
+	int ret = 0;
+	struct page *page = alloc_page(GFP_KERNEL);
+	void *mpage_save;
+	char *mpage;
+	int i;
+	int len = 0;
+	dma_addr_t dma_addr;
+
+	if (!page) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	mpage = kmap(page);
+	mpage_save = mpage;
+	for (i = 0; i < niov; i++) {
+		int cfur;
+
+		cfur = copy_from_user(mpage,
+				      iov[i].iov_base, iov[i].iov_len);
+		if (cfur) {
+			ret = -EFAULT;
+			goto free_unmap;
+		}
+
+		mpage += iov[i].iov_len;
+		len += iov[i].iov_len;
+	}
+
+	dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
+				DMA_TO_DEVICE);
+	if (dma_mapping_error(dma_addr)) {
+		ret = -ENOMEM;
+		goto free_unmap;
+	}
+
+	ipath_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
+				  dma_addr);
+	pkt->naddr = 2;
+
+	goto done;
+
+free_unmap:
+	kunmap(page);
+	__free_page(page);
+done:
+	return ret;
+}
+
+/* how many pages in this iovec element? */
+static int ipath_user_sdma_num_pages(const struct iovec *iov)
+{
+	const unsigned long addr  = (unsigned long) iov->iov_base;
+	const unsigned long  len  = iov->iov_len;
+	const unsigned long spage = addr & PAGE_MASK;
+	const unsigned long epage = (addr + len - 1) & PAGE_MASK;
+
+	return 1 + ((epage - spage) >> PAGE_SHIFT);
+}
+
+/* truncate length to page boundry */
+static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len)
+{
+	const unsigned long offset = addr & ~PAGE_MASK;
+
+	return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
+}
+
+static void ipath_user_sdma_free_pkt_frag(struct device *dev,
+					  struct ipath_user_sdma_queue *pq,
+					  struct ipath_user_sdma_pkt *pkt,
+					  int frag)
+{
+	const int i = frag;
+
+	if (pkt->addr[i].page) {
+		if (pkt->addr[i].dma_mapped)
+			dma_unmap_page(dev,
+				       pkt->addr[i].addr,
+				       pkt->addr[i].length,
+				       DMA_TO_DEVICE);
+
+		if (pkt->addr[i].kvaddr)
+			kunmap(pkt->addr[i].page);
+
+		if (pkt->addr[i].put_page)
+			put_page(pkt->addr[i].page);
+		else
+			__free_page(pkt->addr[i].page);
+	} else if (pkt->addr[i].kvaddr)
+		/* free coherent mem from cache... */
+		dma_pool_free(pq->header_cache,
+			      pkt->addr[i].kvaddr, pkt->addr[i].addr);
+}
+
+/* return number of pages pinned... */
+static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
+				     struct ipath_user_sdma_pkt *pkt,
+				     unsigned long addr, int tlen, int npages)
+{
+	struct page *pages[2];
+	int j;
+	int ret;
+
+	ret = get_user_pages(current, current->mm, addr,
+			     npages, 0, 1, pages, NULL);
+
+	if (ret != npages) {
+		int i;
+
+		for (i = 0; i < ret; i++)
+			put_page(pages[i]);
+
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	for (j = 0; j < npages; j++) {
+		/* map the pages... */
+		const int flen =
+			ipath_user_sdma_page_length(addr, tlen);
+		dma_addr_t dma_addr =
+			dma_map_page(&dd->pcidev->dev,
+				     pages[j], 0, flen, DMA_TO_DEVICE);
+		unsigned long fofs = addr & ~PAGE_MASK;
+
+		if (dma_mapping_error(dma_addr)) {
+			ret = -ENOMEM;
+			goto done;
+		}
+
+		ipath_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
+					  pages[j], kmap(pages[j]),
+					  dma_addr);
+
+		pkt->naddr++;
+		addr += flen;
+		tlen -= flen;
+	}
+
+done:
+	return ret;
+}
+
+static int ipath_user_sdma_pin_pkt(const struct ipath_devdata *dd,
+				   struct ipath_user_sdma_queue *pq,
+				   struct ipath_user_sdma_pkt *pkt,
+				   const struct iovec *iov,
+				   unsigned long niov)
+{
+	int ret = 0;
+	unsigned long idx;
+
+	for (idx = 0; idx < niov; idx++) {
+		const int npages = ipath_user_sdma_num_pages(iov + idx);
+		const unsigned long addr = (unsigned long) iov[idx].iov_base;
+
+		ret = ipath_user_sdma_pin_pages(dd, pkt,
+						addr, iov[idx].iov_len,
+						npages);
+		if (ret < 0)
+			goto free_pkt;
+	}
+
+	goto done;
+
+free_pkt:
+	for (idx = 0; idx < pkt->naddr; idx++)
+		ipath_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
+
+done:
+	return ret;
+}
+
+static int ipath_user_sdma_init_payload(const struct ipath_devdata *dd,
+					struct ipath_user_sdma_queue *pq,
+					struct ipath_user_sdma_pkt *pkt,
+					const struct iovec *iov,
+					unsigned long niov, int npages)
+{
+	int ret = 0;
+
+	if (npages >= ARRAY_SIZE(pkt->addr))
+		ret = ipath_user_sdma_coalesce(dd, pkt, iov, niov);
+	else
+		ret = ipath_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
+
+	return ret;
+}
+
+/* free a packet list -- return counter value of last packet */
+static void ipath_user_sdma_free_pkt_list(struct device *dev,
+					  struct ipath_user_sdma_queue *pq,
+					  struct list_head *list)
+{
+	struct ipath_user_sdma_pkt *pkt, *pkt_next;
+
+	list_for_each_entry_safe(pkt, pkt_next, list, list) {
+		int i;
+
+		for (i = 0; i < pkt->naddr; i++)
+			ipath_user_sdma_free_pkt_frag(dev, pq, pkt, i);
+
+		kmem_cache_free(pq->pkt_slab, pkt);
+	}
+}
+
+/*
+ * copy headers, coalesce etc -- pq->lock must be held
+ *
+ * we queue all the packets to list, returning the
+ * number of bytes total.  list must be empty initially,
+ * as, if there is an error we clean it...
+ */
+static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
+				      struct ipath_user_sdma_queue *pq,
+				      struct list_head *list,
+				      const struct iovec *iov,
+				      unsigned long niov,
+				      int maxpkts)
+{
+	unsigned long idx = 0;
+	int ret = 0;
+	int npkts = 0;
+	struct page *page = NULL;
+	__le32 *pbc;
+	dma_addr_t dma_addr;
+	struct ipath_user_sdma_pkt *pkt = NULL;
+	size_t len;
+	size_t nw;
+	u32 counter = pq->counter;
+	int dma_mapped = 0;
+
+	while (idx < niov && npkts < maxpkts) {
+		const unsigned long addr = (unsigned long) iov[idx].iov_base;
+		const unsigned long idx_save = idx;
+		unsigned pktnw;
+		unsigned pktnwc;
+		int nfrags = 0;
+		int npages = 0;
+		int cfur;
+
+		dma_mapped = 0;
+		len = iov[idx].iov_len;
+		nw = len >> 2;
+		page = NULL;
+
+		pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
+		if (!pkt) {
+			ret = -ENOMEM;
+			goto free_list;
+		}
+
+		if (len < IPATH_USER_SDMA_MIN_HEADER_LENGTH ||
+		    len > PAGE_SIZE || len & 3 || addr & 3) {
+			ret = -EINVAL;
+			goto free_pkt;
+		}
+
+		if (len == IPATH_USER_SDMA_EXP_HEADER_LENGTH)
+			pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
+					     &dma_addr);
+		else
+			pbc = NULL;
+
+		if (!pbc) {
+			page = alloc_page(GFP_KERNEL);
+			if (!page) {
+				ret = -ENOMEM;
+				goto free_pkt;
+			}
+			pbc = kmap(page);
+		}
+
+		cfur = copy_from_user(pbc, iov[idx].iov_base, len);
+		if (cfur) {
+			ret = -EFAULT;
+			goto free_pbc;
+		}
+
+		/*
+		 * this assignment is a bit strange.  it's because the
+		 * the pbc counts the number of 32 bit words in the full
+		 * packet _except_ the first word of the pbc itself...
+		 */
+		pktnwc = nw - 1;
+
+		/*
+		 * pktnw computation yields the number of 32 bit words
+		 * that the caller has indicated in the PBC.  note that
+		 * this is one less than the total number of words that
+		 * goes to the send DMA engine as the first 32 bit word
+		 * of the PBC itself is not counted.  Armed with this count,
+		 * we can verify that the packet is consistent with the
+		 * iovec lengths.
+		 */
+		pktnw = le32_to_cpu(*pbc) & IPATH_PBC_LENGTH_MASK;
+		if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
+			ret = -EINVAL;
+			goto free_pbc;
+		}
+
+
+		idx++;
+		while (pktnwc < pktnw && idx < niov) {
+			const size_t slen = iov[idx].iov_len;
+			const unsigned long faddr =
+				(unsigned long) iov[idx].iov_base;
+
+			if (slen & 3 || faddr & 3 || !slen ||
+			    slen > PAGE_SIZE) {
+				ret = -EINVAL;
+				goto free_pbc;
+			}
+
+			npages++;
+			if ((faddr & PAGE_MASK) !=
+			    ((faddr + slen - 1) & PAGE_MASK))
+				npages++;
+
+			pktnwc += slen >> 2;
+			idx++;
+			nfrags++;
+		}
+
+		if (pktnwc != pktnw) {
+			ret = -EINVAL;
+			goto free_pbc;
+		}
+
+		if (page) {
+			dma_addr = dma_map_page(&dd->pcidev->dev,
+						page, 0, len, DMA_TO_DEVICE);
+			if (dma_mapping_error(dma_addr)) {
+				ret = -ENOMEM;
+				goto free_pbc;
+			}
+
+			dma_mapped = 1;
+		}
+
+		ipath_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
+					    page, pbc, dma_addr);
+
+		if (nfrags) {
+			ret = ipath_user_sdma_init_payload(dd, pq, pkt,
+							   iov + idx_save + 1,
+							   nfrags, npages);
+			if (ret < 0)
+				goto free_pbc_dma;
+		}
+
+		counter++;
+		npkts++;
+
+		list_add_tail(&pkt->list, list);
+	}
+
+	ret = idx;
+	goto done;
+
+free_pbc_dma:
+	if (dma_mapped)
+		dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
+free_pbc:
+	if (page) {
+		kunmap(page);
+		__free_page(page);
+	} else
+		dma_pool_free(pq->header_cache, pbc, dma_addr);
+free_pkt:
+	kmem_cache_free(pq->pkt_slab, pkt);
+free_list:
+	ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
+done:
+	return ret;
+}
+
+static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq,
+						 u32 c)
+{
+	pq->sent_counter = c;
+}
+
+/* try to clean out queue -- needs pq->lock */
+static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd,
+				       struct ipath_user_sdma_queue *pq)
+{
+	struct list_head free_list;
+	struct ipath_user_sdma_pkt *pkt;
+	struct ipath_user_sdma_pkt *pkt_prev;
+	int ret = 0;
+
+	INIT_LIST_HEAD(&free_list);
+
+	list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
+		s64 descd = dd->ipath_sdma_descq_removed - pkt->added;
+
+		if (descd < 0)
+			break;
+
+		list_move_tail(&pkt->list, &free_list);
+
+		/* one more packet cleaned */
+		ret++;
+	}
+
+	if (!list_empty(&free_list)) {
+		u32 counter;
+
+		pkt = list_entry(free_list.prev,
+				 struct ipath_user_sdma_pkt, list);
+		counter = pkt->counter;
+
+		ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
+		ipath_user_sdma_set_complete_counter(pq, counter);
+	}
+
+	return ret;
+}
+
+void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq)
+{
+	if (!pq)
+		return;
+
+	kmem_cache_destroy(pq->pkt_slab);
+	dma_pool_destroy(pq->header_cache);
+	kfree(pq);
+}
+
+/* clean descriptor queue, returns > 0 if some elements cleaned */
+static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata *dd)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+	ret = ipath_sdma_make_progress(dd);
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+	return ret;
+}
+
+/* we're in close, drain packets so that we can cleanup successfully... */
+void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
+				 struct ipath_user_sdma_queue *pq)
+{
+	int i;
+
+	if (!pq)
+		return;
+
+	for (i = 0; i < 100; i++) {
+		mutex_lock(&pq->lock);
+		if (list_empty(&pq->sent)) {
+			mutex_unlock(&pq->lock);
+			break;
+		}
+		ipath_user_sdma_hwqueue_clean(dd);
+		ipath_user_sdma_queue_clean(dd, pq);
+		mutex_unlock(&pq->lock);
+		msleep(10);
+	}
+
+	if (!list_empty(&pq->sent)) {
+		struct list_head free_list;
+
+		printk(KERN_INFO "drain: lists not empty: forcing!\n");
+		INIT_LIST_HEAD(&free_list);
+		mutex_lock(&pq->lock);
+		list_splice_init(&pq->sent, &free_list);
+		ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
+		mutex_unlock(&pq->lock);
+	}
+}
+
+static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd,
+					   u64 addr, u64 dwlen, u64 dwoffset)
+{
+	return cpu_to_le64(/* SDmaPhyAddr[31:0] */
+			   ((addr & 0xfffffffcULL) << 32) |
+			   /* SDmaGeneration[1:0] */
+			   ((dd->ipath_sdma_generation & 3ULL) << 30) |
+			   /* SDmaDwordCount[10:0] */
+			   ((dwlen & 0x7ffULL) << 16) |
+			   /* SDmaBufOffset[12:2] */
+			   (dwoffset & 0x7ffULL));
+}
+
+static inline __le64 ipath_sdma_make_first_desc0(__le64 descq)
+{
+	return descq | __constant_cpu_to_le64(1ULL << 12);
+}
+
+static inline __le64 ipath_sdma_make_last_desc0(__le64 descq)
+{
+					      /* last */  /* dma head */
+	return descq | __constant_cpu_to_le64(1ULL << 11 | 1ULL << 13);
+}
+
+static inline __le64 ipath_sdma_make_desc1(u64 addr)
+{
+	/* SDmaPhyAddr[47:32] */
+	return cpu_to_le64(addr >> 32);
+}
+
+static void ipath_user_sdma_send_frag(struct ipath_devdata *dd,
+				      struct ipath_user_sdma_pkt *pkt, int idx,
+				      unsigned ofs, u16 tail)
+{
+	const u64 addr = (u64) pkt->addr[idx].addr +
+		(u64) pkt->addr[idx].offset;
+	const u64 dwlen = (u64) pkt->addr[idx].length / 4;
+	__le64 *descqp;
+	__le64 descq0;
+
+	descqp = &dd->ipath_sdma_descq[tail].qw[0];
+
+	descq0 = ipath_sdma_make_desc0(dd, addr, dwlen, ofs);
+	if (idx == 0)
+		descq0 = ipath_sdma_make_first_desc0(descq0);
+	if (idx == pkt->naddr - 1)
+		descq0 = ipath_sdma_make_last_desc0(descq0);
+
+	descqp[0] = descq0;
+	descqp[1] = ipath_sdma_make_desc1(addr);
+}
+
+/* pq->lock must be held, get packets on the wire... */
+static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd,
+				     struct ipath_user_sdma_queue *pq,
+				     struct list_head *pktlist)
+{
+	int ret = 0;
+	unsigned long flags;
+	u16 tail;
+
+	if (list_empty(pktlist))
+		return 0;
+
+	if (unlikely(!(dd->ipath_flags & IPATH_LINKACTIVE)))
+		return -ECOMM;
+
+	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+	if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) {
+		ret = -ECOMM;
+		goto unlock;
+	}
+
+	tail = dd->ipath_sdma_descq_tail;
+	while (!list_empty(pktlist)) {
+		struct ipath_user_sdma_pkt *pkt =
+			list_entry(pktlist->next, struct ipath_user_sdma_pkt,
+				   list);
+		int i;
+		unsigned ofs = 0;
+		u16 dtail = tail;
+
+		if (pkt->naddr > ipath_sdma_descq_freecnt(dd))
+			goto unlock_check_tail;
+
+		for (i = 0; i < pkt->naddr; i++) {
+			ipath_user_sdma_send_frag(dd, pkt, i, ofs, tail);
+			ofs += pkt->addr[i].length >> 2;
+
+			if (++tail == dd->ipath_sdma_descq_cnt) {
+				tail = 0;
+				++dd->ipath_sdma_generation;
+			}
+		}
+
+		if ((ofs<<2) > dd->ipath_ibmaxlen) {
+			ipath_dbg("packet size %X > ibmax %X, fail\n",
+				ofs<<2, dd->ipath_ibmaxlen);
+			ret = -EMSGSIZE;
+			goto unlock;
+		}
+
+		/*
+		 * if the packet is >= 2KB mtu equivalent, we have to use
+		 * the large buffers, and have to mark each descriptor as
+		 * part of a large buffer packet.
+		 */
+		if (ofs >= IPATH_SMALLBUF_DWORDS) {
+			for (i = 0; i < pkt->naddr; i++) {
+				dd->ipath_sdma_descq[dtail].qw[0] |=
+					__constant_cpu_to_le64(1ULL << 14);
+				if (++dtail == dd->ipath_sdma_descq_cnt)
+					dtail = 0;
+			}
+		}
+
+		dd->ipath_sdma_descq_added += pkt->naddr;
+		pkt->added = dd->ipath_sdma_descq_added;
+		list_move_tail(&pkt->list, &pq->sent);
+		ret++;
+	}
+
+unlock_check_tail:
+	/* advance the tail on the chip if necessary */
+	if (dd->ipath_sdma_descq_tail != tail) {
+		wmb();
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
+		dd->ipath_sdma_descq_tail = tail;
+	}
+
+unlock:
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+	return ret;
+}
+
+int ipath_user_sdma_writev(struct ipath_devdata *dd,
+			   struct ipath_user_sdma_queue *pq,
+			   const struct iovec *iov,
+			   unsigned long dim)
+{
+	int ret = 0;
+	struct list_head list;
+	int npkts = 0;
+
+	INIT_LIST_HEAD(&list);
+
+	mutex_lock(&pq->lock);
+
+	if (dd->ipath_sdma_descq_added != dd->ipath_sdma_descq_removed) {
+		ipath_user_sdma_hwqueue_clean(dd);
+		ipath_user_sdma_queue_clean(dd, pq);
+	}
+
+	while (dim) {
+		const int mxp = 8;
+
+		down_write(&current->mm->mmap_sem);
+		ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
+		up_write(&current->mm->mmap_sem);
+
+		if (ret <= 0)
+			goto done_unlock;
+		else {
+			dim -= ret;
+			iov += ret;
+		}
+
+		/* force packets onto the sdma hw queue... */
+		if (!list_empty(&list)) {
+			/*
+			 * lazily clean hw queue.  the 4 is a guess of about
+			 * how many sdma descriptors a packet will take (it
+			 * doesn't have to be perfect).
+			 */
+			if (ipath_sdma_descq_freecnt(dd) < ret * 4) {
+				ipath_user_sdma_hwqueue_clean(dd);
+				ipath_user_sdma_queue_clean(dd, pq);
+			}
+
+			ret = ipath_user_sdma_push_pkts(dd, pq, &list);
+			if (ret < 0)
+				goto done_unlock;
+			else {
+				npkts += ret;
+				pq->counter += ret;
+
+				if (!list_empty(&list))
+					goto done_unlock;
+			}
+		}
+	}
+
+done_unlock:
+	if (!list_empty(&list))
+		ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
+	mutex_unlock(&pq->lock);
+
+	return (ret < 0) ? ret : npkts;
+}
+
+int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
+				  struct ipath_user_sdma_queue *pq)
+{
+	int ret = 0;
+
+	mutex_lock(&pq->lock);
+	ipath_user_sdma_hwqueue_clean(dd);
+	ret = ipath_user_sdma_queue_clean(dd, pq);
+	mutex_unlock(&pq->lock);
+
+	return ret;
+}
+
+u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq)
+{
+	return pq->sent_counter;
+}
+
+u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq)
+{
+	return pq->counter;
+}
+
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
new file mode 100644
index 0000000..e70946c
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/device.h>
+
+struct ipath_user_sdma_queue;
+
+struct ipath_user_sdma_queue *
+ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
+void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq);
+
+int ipath_user_sdma_writev(struct ipath_devdata *dd,
+			   struct ipath_user_sdma_queue *pq,
+			   const struct iovec *iov,
+			   unsigned long dim);
+
+int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
+				  struct ipath_user_sdma_queue *pq);
+
+int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq,
+			     u32 counter);
+void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
+				 struct ipath_user_sdma_queue *pq);
+
+u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq);
+u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 32d8f88..320a6d0 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -242,6 +242,93 @@
 	ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
 }
 
+/*
+ * Count the number of DMA descriptors needed to send length bytes of data.
+ * Don't modify the ipath_sge_state to get the count.
+ * Return zero if any of the segments is not aligned.
+ */
+static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
+{
+	struct ipath_sge *sg_list = ss->sg_list;
+	struct ipath_sge sge = ss->sge;
+	u8 num_sge = ss->num_sge;
+	u32 ndesc = 1;	/* count the header */
+
+	while (length) {
+		u32 len = sge.length;
+
+		if (len > length)
+			len = length;
+		if (len > sge.sge_length)
+			len = sge.sge_length;
+		BUG_ON(len == 0);
+		if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
+		    (len != length && (len & (sizeof(u32) - 1)))) {
+			ndesc = 0;
+			break;
+		}
+		ndesc++;
+		sge.vaddr += len;
+		sge.length -= len;
+		sge.sge_length -= len;
+		if (sge.sge_length == 0) {
+			if (--num_sge)
+				sge = *sg_list++;
+		} else if (sge.length == 0 && sge.mr != NULL) {
+			if (++sge.n >= IPATH_SEGSZ) {
+				if (++sge.m >= sge.mr->mapsz)
+					break;
+				sge.n = 0;
+			}
+			sge.vaddr =
+				sge.mr->map[sge.m]->segs[sge.n].vaddr;
+			sge.length =
+				sge.mr->map[sge.m]->segs[sge.n].length;
+		}
+		length -= len;
+	}
+	return ndesc;
+}
+
+/*
+ * Copy from the SGEs to the data buffer.
+ */
+static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
+				u32 length)
+{
+	struct ipath_sge *sge = &ss->sge;
+
+	while (length) {
+		u32 len = sge->length;
+
+		if (len > length)
+			len = length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
+		BUG_ON(len == 0);
+		memcpy(data, sge->vaddr, len);
+		sge->vaddr += len;
+		sge->length -= len;
+		sge->sge_length -= len;
+		if (sge->sge_length == 0) {
+			if (--ss->num_sge)
+				*sge = *ss->sg_list++;
+		} else if (sge->length == 0 && sge->mr != NULL) {
+			if (++sge->n >= IPATH_SEGSZ) {
+				if (++sge->m >= sge->mr->mapsz)
+					break;
+				sge->n = 0;
+			}
+			sge->vaddr =
+				sge->mr->map[sge->m]->segs[sge->n].vaddr;
+			sge->length =
+				sge->mr->map[sge->m]->segs[sge->n].length;
+		}
+		data += len;
+		length -= len;
+	}
+}
+
 /**
  * ipath_post_one_send - post one RC, UC, or UD send work request
  * @qp: the QP to post on
@@ -866,27 +953,257 @@
 		__raw_writel(last, piobuf);
 }
 
-static int ipath_verbs_send_pio(struct ipath_qp *qp, u32 *hdr, u32 hdrwords,
+/*
+ * Convert IB rate to delay multiplier.
+ */
+unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
+{
+	switch (rate) {
+	case IB_RATE_2_5_GBPS: return 8;
+	case IB_RATE_5_GBPS:   return 4;
+	case IB_RATE_10_GBPS:  return 2;
+	case IB_RATE_20_GBPS:  return 1;
+	default:	       return 0;
+	}
+}
+
+/*
+ * Convert delay multiplier to IB rate
+ */
+static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
+{
+	switch (mult) {
+	case 8:  return IB_RATE_2_5_GBPS;
+	case 4:  return IB_RATE_5_GBPS;
+	case 2:  return IB_RATE_10_GBPS;
+	case 1:  return IB_RATE_20_GBPS;
+	default: return IB_RATE_PORT_CURRENT;
+	}
+}
+
+static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
+{
+	struct ipath_verbs_txreq *tx = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->pending_lock, flags);
+	if (!list_empty(&dev->txreq_free)) {
+		struct list_head *l = dev->txreq_free.next;
+
+		list_del(l);
+		tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
+	}
+	spin_unlock_irqrestore(&dev->pending_lock, flags);
+	return tx;
+}
+
+static inline void put_txreq(struct ipath_ibdev *dev,
+			     struct ipath_verbs_txreq *tx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->pending_lock, flags);
+	list_add(&tx->txreq.list, &dev->txreq_free);
+	spin_unlock_irqrestore(&dev->pending_lock, flags);
+}
+
+static void sdma_complete(void *cookie, int status)
+{
+	struct ipath_verbs_txreq *tx = cookie;
+	struct ipath_qp *qp = tx->qp;
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+
+	/* Generate a completion queue entry if needed */
+	if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) {
+		enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
+			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
+
+		ipath_send_complete(qp, tx->wqe, ibs);
+	}
+
+	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
+		kfree(tx->txreq.map_addr);
+	put_txreq(dev, tx);
+
+	if (atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+}
+
+/*
+ * Compute the number of clock cycles of delay before sending the next packet.
+ * The multipliers reflect the number of clocks for the fastest rate so
+ * one tick at 4xDDR is 8 ticks at 1xSDR.
+ * If the destination port will take longer to receive a packet than
+ * the outgoing link can send it, we need to delay sending the next packet
+ * by the difference in time it takes the receiver to receive and the sender
+ * to send this packet.
+ * Note that this delay is always correct for UC and RC but not always
+ * optimal for UD. For UD, the destination HCA can be different for each
+ * packet, in which case, we could send packets to a different destination
+ * while "waiting" for the delay. The overhead for doing this without
+ * HW support is more than just paying the cost of delaying some packets
+ * unnecessarily.
+ */
+static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
+{
+	return (rcv_mult > snd_mult) ?
+		(plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
+}
+
+static int ipath_verbs_send_dma(struct ipath_qp *qp,
+				struct ipath_ib_header *hdr, u32 hdrwords,
+				struct ipath_sge_state *ss, u32 len,
+				u32 plen, u32 dwords)
+{
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+	struct ipath_devdata *dd = dev->dd;
+	struct ipath_verbs_txreq *tx;
+	u32 *piobuf;
+	u32 control;
+	u32 ndesc;
+	int ret;
+
+	tx = qp->s_tx;
+	if (tx) {
+		qp->s_tx = NULL;
+		/* resend previously constructed packet */
+		ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
+		if (ret)
+			qp->s_tx = tx;
+		goto bail;
+	}
+
+	tx = get_txreq(dev);
+	if (!tx) {
+		ret = -EBUSY;
+		goto bail;
+	}
+
+	/*
+	 * Get the saved delay count we computed for the previous packet
+	 * and save the delay count for this packet to be used next time
+	 * we get here.
+	 */
+	control = qp->s_pkt_delay;
+	qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
+
+	tx->qp = qp;
+	atomic_inc(&qp->refcount);
+	tx->wqe = qp->s_wqe;
+	tx->txreq.callback = sdma_complete;
+	tx->txreq.callback_cookie = tx;
+	tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
+		IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
+	if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
+		tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
+
+	/* VL15 packets bypass credit check */
+	if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
+		control |= 1ULL << 31;
+		tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
+	}
+
+	if (len) {
+		/*
+		 * Don't try to DMA if it takes more descriptors than
+		 * the queue holds.
+		 */
+		ndesc = ipath_count_sge(ss, len);
+		if (ndesc >= dd->ipath_sdma_descq_cnt)
+			ndesc = 0;
+	} else
+		ndesc = 1;
+	if (ndesc) {
+		tx->hdr.pbc[0] = cpu_to_le32(plen);
+		tx->hdr.pbc[1] = cpu_to_le32(control);
+		memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
+		tx->txreq.sg_count = ndesc;
+		tx->map_len = (hdrwords + 2) << 2;
+		tx->txreq.map_addr = &tx->hdr;
+		ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
+		if (ret) {
+			/* save ss and length in dwords */
+			tx->ss = ss;
+			tx->len = dwords;
+			qp->s_tx = tx;
+		}
+		goto bail;
+	}
+
+	/* Allocate a buffer and copy the header and payload to it. */
+	tx->map_len = (plen + 1) << 2;
+	piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
+	if (unlikely(piobuf == NULL)) {
+		ret = -EBUSY;
+		goto err_tx;
+	}
+	tx->txreq.map_addr = piobuf;
+	tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
+	tx->txreq.sg_count = 1;
+
+	*piobuf++ = (__force u32) cpu_to_le32(plen);
+	*piobuf++ = (__force u32) cpu_to_le32(control);
+	memcpy(piobuf, hdr, hdrwords << 2);
+	ipath_copy_from_sge(piobuf + hdrwords, ss, len);
+
+	ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
+	/*
+	 * If we couldn't queue the DMA request, save the info
+	 * and try again later rather than destroying the
+	 * buffer and undoing the side effects of the copy.
+	 */
+	if (ret) {
+		tx->ss = NULL;
+		tx->len = 0;
+		qp->s_tx = tx;
+	}
+	dev->n_unaligned++;
+	goto bail;
+
+err_tx:
+	if (atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+	put_txreq(dev, tx);
+bail:
+	return ret;
+}
+
+static int ipath_verbs_send_pio(struct ipath_qp *qp,
+				struct ipath_ib_header *ibhdr, u32 hdrwords,
 				struct ipath_sge_state *ss, u32 len,
 				u32 plen, u32 dwords)
 {
 	struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
+	u32 *hdr = (u32 *) ibhdr;
 	u32 __iomem *piobuf;
 	unsigned flush_wc;
+	u32 control;
 	int ret;
 
-	piobuf = ipath_getpiobuf(dd, NULL);
+	piobuf = ipath_getpiobuf(dd, plen, NULL);
 	if (unlikely(piobuf == NULL)) {
 		ret = -EBUSY;
 		goto bail;
 	}
 
 	/*
-	 * Write len to control qword, no flags.
+	 * Get the saved delay count we computed for the previous packet
+	 * and save the delay count for this packet to be used next time
+	 * we get here.
+	 */
+	control = qp->s_pkt_delay;
+	qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
+
+	/* VL15 packets bypass credit check */
+	if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
+		control |= 1ULL << 31;
+
+	/*
+	 * Write the length to the control qword plus any needed flags.
 	 * We have to flush after the PBC for correctness on some cpus
 	 * or WC buffer can be written out of order.
 	 */
-	writeq(plen, piobuf);
+	writeq(((u64) control << 32) | plen, piobuf);
 	piobuf += 2;
 
 	flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
@@ -961,15 +1278,25 @@
 	 */
 	plen = hdrwords + dwords + 1;
 
-	/* Drop non-VL15 packets if we are not in the active state */
-	if (!(dd->ipath_flags & IPATH_LINKACTIVE) &&
-	    qp->ibqp.qp_type != IB_QPT_SMI) {
+	/*
+	 * VL15 packets (IB_QPT_SMI) will always use PIO, so we
+	 * can defer SDMA restart until link goes ACTIVE without
+	 * worrying about just how we got there.
+	 */
+	if (qp->ibqp.qp_type == IB_QPT_SMI)
+		ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
+					   plen, dwords);
+	/* All non-VL15 packets are dropped if link is not ACTIVE */
+	else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) {
 		if (qp->s_wqe)
 			ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
 		ret = 0;
-	} else
-		ret = ipath_verbs_send_pio(qp, (u32 *) hdr, hdrwords,
-					   ss, len, plen, dwords);
+	} else if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+		ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
+					   plen, dwords);
+	else
+		ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
+					   plen, dwords);
 
 	return ret;
 }
@@ -1038,6 +1365,12 @@
 		ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
 		ipath_snap_cntr(dd, crp->cr_badformatcnt) +
 		dd->ipath_rxfc_unsupvl_errs;
+	if (crp->cr_rxotherlocalphyerrcnt)
+		cntrs->port_rcv_errors +=
+			ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
+	if (crp->cr_rxvlerrcnt)
+		cntrs->port_rcv_errors +=
+			ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
 	cntrs->port_rcv_remphys_errors =
 		ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
 	cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
@@ -1046,9 +1379,16 @@
 	cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
 	cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
 	cntrs->local_link_integrity_errors =
-		(dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
-		dd->ipath_lli_errs : dd->ipath_lli_errors;
-	cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs;
+		crp->cr_locallinkintegrityerrcnt ?
+		ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
+		((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
+		 dd->ipath_lli_errs : dd->ipath_lli_errors);
+	cntrs->excessive_buffer_overrun_errors =
+		crp->cr_excessbufferovflcnt ?
+		ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
+		dd->ipath_overrun_thresh_errs;
+	cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
+		ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
 
 	ret = 0;
 
@@ -1183,7 +1523,9 @@
 	props->sm_lid = dev->sm_lid;
 	props->sm_sl = dev->sm_sl;
 	ibcstat = dd->ipath_lastibcstat;
-	props->state = ((ibcstat >> 4) & 0x3) + 1;
+	/* map LinkState to IB portinfo values.  */
+	props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
+
 	/* See phys_state_show() */
 	props->phys_state = /* MEA: assumes shift == 0 */
 		ipath_cvt_physportstate[dd->ipath_lastibcstat &
@@ -1195,18 +1537,13 @@
 	props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
 		dev->z_pkey_violations;
 	props->qkey_viol_cntr = dev->qkey_violations;
-	props->active_width = IB_WIDTH_4X;
+	props->active_width = dd->ipath_link_width_active;
 	/* See rate_show() */
-	props->active_speed = 1;	/* Regular 10Mbs speed. */
+	props->active_speed = dd->ipath_link_speed_active;
 	props->max_vl_num = 1;		/* VLCap = VL0 */
 	props->init_type_reply = 0;
 
-	/*
-	 * Note: the chip supports a maximum MTU of 4096, but the driver
-	 * hasn't implemented this feature yet, so set the maximum value
-	 * to 2048.
-	 */
-	props->max_mtu = IB_MTU_2048;
+	props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
 	switch (dd->ipath_ibmtu) {
 	case 4096:
 		mtu = IB_MTU_4096;
@@ -1399,6 +1736,7 @@
 
 	/* ib_create_ah() will initialize ah->ibah. */
 	ah->attr = *ah_attr;
+	ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
 
 	ret = &ah->ibah;
 
@@ -1432,6 +1770,7 @@
 	struct ipath_ah *ah = to_iah(ibah);
 
 	*ah_attr = ah->attr;
+	ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
 
 	return 0;
 }
@@ -1581,6 +1920,8 @@
 	struct ipath_verbs_counters cntrs;
 	struct ipath_ibdev *idev;
 	struct ib_device *dev;
+	struct ipath_verbs_txreq *tx;
+	unsigned i;
 	int ret;
 
 	idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
@@ -1591,6 +1932,17 @@
 
 	dev = &idev->ibdev;
 
+	if (dd->ipath_sdma_descq_cnt) {
+		tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx,
+			     GFP_KERNEL);
+		if (tx == NULL) {
+			ret = -ENOMEM;
+			goto err_tx;
+		}
+	} else
+		tx = NULL;
+	idev->txreq_bufs = tx;
+
 	/* Only need to initialize non-zero fields. */
 	spin_lock_init(&idev->n_pds_lock);
 	spin_lock_init(&idev->n_ahs_lock);
@@ -1631,15 +1983,17 @@
 	INIT_LIST_HEAD(&idev->pending[2]);
 	INIT_LIST_HEAD(&idev->piowait);
 	INIT_LIST_HEAD(&idev->rnrwait);
+	INIT_LIST_HEAD(&idev->txreq_free);
 	idev->pending_index = 0;
 	idev->port_cap_flags =
 		IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
+	if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
+		idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
 	idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
 	idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
 	idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
 	idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
 	idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
-	idev->link_width_enabled = 3;	/* 1x or 4x */
 
 	/* Snapshot current HW counters to "clear" them. */
 	ipath_get_counters(dd, &cntrs);
@@ -1661,6 +2015,9 @@
 		cntrs.excessive_buffer_overrun_errors;
 	idev->z_vl15_dropped = cntrs.vl15_dropped;
 
+	for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
+		list_add(&tx->txreq.list, &idev->txreq_free);
+
 	/*
 	 * The system image GUID is supposed to be the same for all
 	 * IB HCAs in a single system but since there can be other
@@ -1710,6 +2067,7 @@
 	dev->phys_port_cnt = 1;
 	dev->num_comp_vectors = 1;
 	dev->dma_device = &dd->pcidev->dev;
+	dev->class_dev.dev = dev->dma_device;
 	dev->query_device = ipath_query_device;
 	dev->modify_device = ipath_modify_device;
 	dev->query_port = ipath_query_port;
@@ -1774,6 +2132,8 @@
 err_lk:
 	kfree(idev->qp_table.table);
 err_qp:
+	kfree(idev->txreq_bufs);
+err_tx:
 	ib_dealloc_device(dev);
 	ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
 	idev = NULL;
@@ -1808,6 +2168,7 @@
 	ipath_free_all_qps(&dev->qp_table);
 	kfree(dev->qp_table.table);
 	kfree(dev->lk_table.table);
+	kfree(dev->txreq_bufs);
 	ib_dealloc_device(ibdev);
 }
 
@@ -1855,13 +2216,15 @@
 		      "RC stalls   %d\n"
 		      "piobuf wait %d\n"
 		      "no piobuf   %d\n"
+		      "unaligned   %d\n"
 		      "PKT drops   %d\n"
 		      "WQE errs    %d\n",
 		      dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
 		      dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
 		      dev->n_other_naks, dev->n_timeouts,
 		      dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait,
-		      dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs);
+		      dev->n_no_piobuf, dev->n_unaligned,
+		      dev->n_pkt_drops, dev->n_wqe_errs);
 	for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
 		const struct ipath_opcode_stats *si = &dev->opstats[i];
 
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 3d59736..6514aa8 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -138,6 +138,11 @@
 	} u;
 } __attribute__ ((packed));
 
+struct ipath_pio_header {
+	__le32 pbc[2];
+	struct ipath_ib_header hdr;
+} __attribute__ ((packed));
+
 /*
  * There is one struct ipath_mcast for each multicast GID.
  * All attached QPs are then stored as a list of
@@ -319,6 +324,7 @@
 	struct ipath_sge *sg_list;      /* next SGE to be used if any */
 	struct ipath_sge sge;   /* progress state for the current SGE */
 	u8 num_sge;
+	u8 static_rate;
 };
 
 /*
@@ -356,6 +362,7 @@
 	struct tasklet_struct s_task;
 	struct ipath_mmap_info *ip;
 	struct ipath_sge_state *s_cur_sge;
+	struct ipath_verbs_txreq *s_tx;
 	struct ipath_sge_state s_sge;	/* current send request data */
 	struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
 	struct ipath_sge_state s_ack_rdma_sge;
@@ -363,7 +370,8 @@
 	struct ipath_sge_state r_sge;	/* current receive data */
 	spinlock_t s_lock;
 	unsigned long s_busy;
-	u32 s_hdrwords;		/* size of s_hdr in 32 bit words */
+	u16 s_pkt_delay;
+	u16 s_hdrwords;		/* size of s_hdr in 32 bit words */
 	u32 s_cur_size;		/* size of send packet in bytes */
 	u32 s_len;		/* total length of s_sge */
 	u32 s_rdma_read_len;	/* total length of s_rdma_read_sge */
@@ -387,7 +395,6 @@
 	u8 r_nak_state;		/* non-zero if NAK is pending */
 	u8 r_min_rnr_timer;	/* retry timeout value for RNR NAKs */
 	u8 r_reuse_sge;		/* for UC receive errors */
-	u8 r_sge_inx;		/* current index into sg_list */
 	u8 r_wrid_valid;	/* r_wrid set but CQ entry not yet made */
 	u8 r_max_rd_atomic;	/* max number of RDMA read/atomic to receive */
 	u8 r_head_ack_queue;	/* index into s_ack_queue[] */
@@ -403,6 +410,7 @@
 	u8 s_num_rd_atomic;	/* number of RDMA read/atomic pending */
 	u8 s_tail_ack_queue;	/* index into s_ack_queue[] */
 	u8 s_flags;
+	u8 s_dmult;
 	u8 timeout;		/* Timeout for this QP */
 	enum ib_mtu path_mtu;
 	u32 remote_qpn;
@@ -510,6 +518,8 @@
 	struct ipath_lkey_table lk_table;
 	struct list_head pending[3];	/* FIFO of QPs waiting for ACKs */
 	struct list_head piowait;	/* list for wait PIO buf */
+	struct list_head txreq_free;
+	void *txreq_bufs;
 	/* list of QPs waiting for RNR timer */
 	struct list_head rnrwait;
 	spinlock_t pending_lock;
@@ -570,6 +580,7 @@
 	u32 n_rdma_dup_busy;
 	u32 n_piowait;
 	u32 n_no_piobuf;
+	u32 n_unaligned;
 	u32 port_cap_flags;
 	u32 pma_sample_start;
 	u32 pma_sample_interval;
@@ -581,7 +592,6 @@
 	u16 pending_index;	/* which pending queue is active */
 	u8 pma_sample_status;
 	u8 subnet_timeout;
-	u8 link_width_enabled;
 	u8 vl_high_limit;
 	struct ipath_opcode_stats opstats[128];
 };
@@ -602,6 +612,16 @@
 	u32 vl15_dropped;
 };
 
+struct ipath_verbs_txreq {
+	struct ipath_qp         *qp;
+	struct ipath_swqe       *wqe;
+	u32                      map_len;
+	u32                      len;
+	struct ipath_sge_state  *ss;
+	struct ipath_pio_header  hdr;
+	struct ipath_sdma_txreq  txreq;
+};
+
 static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
 {
 	return container_of(ibmr, struct ipath_mr, ibmr);
@@ -694,11 +714,11 @@
 
 void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
 
+unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
+
 int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
 		     u32 hdrwords, struct ipath_sge_state *ss, u32 len);
 
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-
 void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
 
 void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 7360bba..3557e7e 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -85,6 +85,82 @@
 	return get_sw_cqe(cq, cq->mcq.cons_index);
 }
 
+int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+{
+	struct mlx4_ib_cq *mcq = to_mcq(cq);
+	struct mlx4_ib_dev *dev = to_mdev(cq->device);
+
+	return mlx4_cq_modify(dev->dev, &mcq->mcq, cq_count, cq_period);
+}
+
+static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int nent)
+{
+	int err;
+
+	err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe),
+			     PAGE_SIZE * 2, &buf->buf);
+
+	if (err)
+		goto out;
+
+	err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
+				    &buf->mtt);
+	if (err)
+		goto err_buf;
+
+	err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
+	if (err)
+		goto err_mtt;
+
+	return 0;
+
+err_mtt:
+	mlx4_mtt_cleanup(dev->dev, &buf->mtt);
+
+err_buf:
+	mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe),
+			      &buf->buf);
+
+out:
+	return err;
+}
+
+static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
+{
+	mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf);
+}
+
+static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
+			       struct mlx4_ib_cq_buf *buf, struct ib_umem **umem,
+			       u64 buf_addr, int cqe)
+{
+	int err;
+
+	*umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
+			    IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(*umem))
+		return PTR_ERR(*umem);
+
+	err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem),
+			    ilog2((*umem)->page_size), &buf->mtt);
+	if (err)
+		goto err_buf;
+
+	err = mlx4_ib_umem_write_mtt(dev, &buf->mtt, *umem);
+	if (err)
+		goto err_mtt;
+
+	return 0;
+
+err_mtt:
+	mlx4_mtt_cleanup(dev->dev, &buf->mtt);
+
+err_buf:
+	ib_umem_release(*umem);
+
+	return err;
+}
+
 struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
 				struct ib_ucontext *context,
 				struct ib_udata *udata)
@@ -92,7 +168,6 @@
 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
 	struct mlx4_ib_cq *cq;
 	struct mlx4_uar *uar;
-	int buf_size;
 	int err;
 
 	if (entries < 1 || entries > dev->dev->caps.max_cqes)
@@ -104,8 +179,10 @@
 
 	entries      = roundup_pow_of_two(entries + 1);
 	cq->ibcq.cqe = entries - 1;
-	buf_size     = entries * sizeof (struct mlx4_cqe);
+	mutex_init(&cq->resize_mutex);
 	spin_lock_init(&cq->lock);
+	cq->resize_buf = NULL;
+	cq->resize_umem = NULL;
 
 	if (context) {
 		struct mlx4_ib_create_cq ucmd;
@@ -115,21 +192,10 @@
 			goto err_cq;
 		}
 
-		cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size,
-				       IB_ACCESS_LOCAL_WRITE);
-		if (IS_ERR(cq->umem)) {
-			err = PTR_ERR(cq->umem);
+		err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem,
+					  ucmd.buf_addr, entries);
+		if (err)
 			goto err_cq;
-		}
-
-		err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem),
-				    ilog2(cq->umem->page_size), &cq->buf.mtt);
-		if (err)
-			goto err_buf;
-
-		err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem);
-		if (err)
-			goto err_mtt;
 
 		err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
 					  &cq->db);
@@ -147,19 +213,9 @@
 		*cq->mcq.set_ci_db = 0;
 		*cq->mcq.arm_db    = 0;
 
-		if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) {
-			err = -ENOMEM;
+		err = mlx4_ib_alloc_cq_buf(dev, &cq->buf, entries);
+		if (err)
 			goto err_db;
-		}
-
-		err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift,
-				    &cq->buf.mtt);
-		if (err)
-			goto err_buf;
-
-		err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf);
-		if (err)
-			goto err_mtt;
 
 		uar = &dev->priv_uar;
 	}
@@ -187,12 +243,10 @@
 err_mtt:
 	mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
 
-err_buf:
 	if (context)
 		ib_umem_release(cq->umem);
 	else
-		mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe),
-			      &cq->buf.buf);
+		mlx4_ib_free_cq_buf(dev, &cq->buf, entries);
 
 err_db:
 	if (!context)
@@ -204,6 +258,170 @@
 	return ERR_PTR(err);
 }
 
+static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
+				  int entries)
+{
+	int err;
+
+	if (cq->resize_buf)
+		return -EBUSY;
+
+	cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
+	if (!cq->resize_buf)
+		return -ENOMEM;
+
+	err = mlx4_ib_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
+	if (err) {
+		kfree(cq->resize_buf);
+		cq->resize_buf = NULL;
+		return err;
+	}
+
+	cq->resize_buf->cqe = entries - 1;
+
+	return 0;
+}
+
+static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
+				   int entries, struct ib_udata *udata)
+{
+	struct mlx4_ib_resize_cq ucmd;
+	int err;
+
+	if (cq->resize_umem)
+		return -EBUSY;
+
+	if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+		return -EFAULT;
+
+	cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
+	if (!cq->resize_buf)
+		return -ENOMEM;
+
+	err = mlx4_ib_get_cq_umem(dev, cq->umem->context, &cq->resize_buf->buf,
+				  &cq->resize_umem, ucmd.buf_addr, entries);
+	if (err) {
+		kfree(cq->resize_buf);
+		cq->resize_buf = NULL;
+		return err;
+	}
+
+	cq->resize_buf->cqe = entries - 1;
+
+	return 0;
+}
+
+static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
+{
+	u32 i;
+
+	i = cq->mcq.cons_index;
+	while (get_sw_cqe(cq, i & cq->ibcq.cqe))
+		++i;
+
+	return i - cq->mcq.cons_index;
+}
+
+static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
+{
+	struct mlx4_cqe *cqe;
+	int i;
+
+	i = cq->mcq.cons_index;
+	cqe = get_cqe(cq, i & cq->ibcq.cqe);
+	while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
+		memcpy(get_cqe_from_buf(&cq->resize_buf->buf,
+					(i + 1) & cq->resize_buf->cqe),
+			get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
+		cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
+	}
+	++cq->mcq.cons_index;
+}
+
+int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+{
+	struct mlx4_ib_dev *dev = to_mdev(ibcq->device);
+	struct mlx4_ib_cq *cq = to_mcq(ibcq);
+	int outst_cqe;
+	int err;
+
+	mutex_lock(&cq->resize_mutex);
+
+	if (entries < 1 || entries > dev->dev->caps.max_cqes) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	entries = roundup_pow_of_two(entries + 1);
+	if (entries == ibcq->cqe + 1) {
+		err = 0;
+		goto out;
+	}
+
+	if (ibcq->uobject) {
+		err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
+		if (err)
+			goto out;
+	} else {
+		/* Can't be smaller then the number of outstanding CQEs */
+		outst_cqe = mlx4_ib_get_outstanding_cqes(cq);
+		if (entries < outst_cqe + 1) {
+			err = 0;
+			goto out;
+		}
+
+		err = mlx4_alloc_resize_buf(dev, cq, entries);
+		if (err)
+			goto out;
+	}
+
+	err = mlx4_cq_resize(dev->dev, &cq->mcq, entries, &cq->resize_buf->buf.mtt);
+	if (err)
+		goto err_buf;
+
+	if (ibcq->uobject) {
+		cq->buf      = cq->resize_buf->buf;
+		cq->ibcq.cqe = cq->resize_buf->cqe;
+		ib_umem_release(cq->umem);
+		cq->umem     = cq->resize_umem;
+
+		kfree(cq->resize_buf);
+		cq->resize_buf = NULL;
+		cq->resize_umem = NULL;
+	} else {
+		spin_lock_irq(&cq->lock);
+		if (cq->resize_buf) {
+			mlx4_ib_cq_resize_copy_cqes(cq);
+			mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+			cq->buf      = cq->resize_buf->buf;
+			cq->ibcq.cqe = cq->resize_buf->cqe;
+
+			kfree(cq->resize_buf);
+			cq->resize_buf = NULL;
+		}
+		spin_unlock_irq(&cq->lock);
+	}
+
+	goto out;
+
+err_buf:
+	if (!ibcq->uobject)
+		mlx4_ib_free_cq_buf(dev, &cq->resize_buf->buf,
+				    cq->resize_buf->cqe);
+
+	kfree(cq->resize_buf);
+	cq->resize_buf = NULL;
+
+	if (cq->resize_umem) {
+		ib_umem_release(cq->resize_umem);
+		cq->resize_umem = NULL;
+	}
+
+out:
+	mutex_unlock(&cq->resize_mutex);
+	return err;
+}
+
 int mlx4_ib_destroy_cq(struct ib_cq *cq)
 {
 	struct mlx4_ib_dev *dev = to_mdev(cq->device);
@@ -216,8 +434,7 @@
 		mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
 		ib_umem_release(mcq->umem);
 	} else {
-		mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe),
-			      &mcq->buf.buf);
+		mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe + 1);
 		mlx4_ib_db_free(dev, &mcq->db);
 	}
 
@@ -297,6 +514,20 @@
 	wc->vendor_err = cqe->vendor_err_syndrome;
 }
 
+static int mlx4_ib_ipoib_csum_ok(__be32 status, __be16 checksum)
+{
+	return ((status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4	|
+				      MLX4_CQE_IPOIB_STATUS_IPV4F	|
+				      MLX4_CQE_IPOIB_STATUS_IPV4OPT	|
+				      MLX4_CQE_IPOIB_STATUS_IPV6	|
+				      MLX4_CQE_IPOIB_STATUS_IPOK)) ==
+		cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4	|
+			    MLX4_CQE_IPOIB_STATUS_IPOK))		&&
+		(status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_UDP	|
+				      MLX4_CQE_IPOIB_STATUS_TCP))	&&
+		checksum == cpu_to_be16(0xffff);
+}
+
 static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
 			    struct mlx4_ib_qp **cur_qp,
 			    struct ib_wc *wc)
@@ -310,6 +541,7 @@
 	u32 g_mlpath_rqpn;
 	u16 wqe_ctr;
 
+repoll:
 	cqe = next_cqe_sw(cq);
 	if (!cqe)
 		return -EAGAIN;
@@ -332,6 +564,22 @@
 		return -EINVAL;
 	}
 
+	/* Resize CQ in progress */
+	if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) {
+		if (cq->resize_buf) {
+			struct mlx4_ib_dev *dev = to_mdev(cq->ibcq.device);
+
+			mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+			cq->buf      = cq->resize_buf->buf;
+			cq->ibcq.cqe = cq->resize_buf->cqe;
+
+			kfree(cq->resize_buf);
+			cq->resize_buf = NULL;
+		}
+
+		goto repoll;
+	}
+
 	if (!*cur_qp ||
 	    (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) {
 		/*
@@ -406,6 +654,9 @@
 		case MLX4_OPCODE_BIND_MW:
 			wc->opcode    = IB_WC_BIND_MW;
 			break;
+		case MLX4_OPCODE_LSO:
+			wc->opcode    = IB_WC_LSO;
+			break;
 		}
 	} else {
 		wc->byte_len = be32_to_cpu(cqe->byte_cnt);
@@ -434,6 +685,8 @@
 		wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
 		wc->wc_flags	  |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
 		wc->pkey_index     = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
+		wc->csum_ok	   = mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,
+							   cqe->checksum);
 	}
 
 	return 0;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 0ed02b7..4c1e72f 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -165,7 +165,7 @@
 			event.device	       = ibdev;
 			event.element.port_num = port_num;
 
-			if(pinfo->clientrereg_resv_subnetto & 0x80)
+			if (pinfo->clientrereg_resv_subnetto & 0x80)
 				event.event    = IB_EVENT_CLIENT_REREGISTER;
 			else
 				event.event    = IB_EVENT_LID_CHANGE;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 96a39b5..136c76c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -44,8 +44,8 @@
 #include "user.h"
 
 #define DRV_NAME	"mlx4_ib"
-#define DRV_VERSION	"0.01"
-#define DRV_RELDATE	"May 1, 2006"
+#define DRV_VERSION	"1.0"
+#define DRV_RELDATE	"April 4, 2008"
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
@@ -99,6 +99,10 @@
 		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
 		props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
+	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
+		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+	if (dev->dev->caps.max_gso_sz)
+		props->device_cap_flags |= IB_DEVICE_UD_TSO;
 
 	props->vendor_id	   = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
 		0xffffff;
@@ -567,6 +571,7 @@
 		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
 		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
 		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
 		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
 		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
 		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
@@ -605,6 +610,8 @@
 	ibdev->ib_dev.post_send		= mlx4_ib_post_send;
 	ibdev->ib_dev.post_recv		= mlx4_ib_post_recv;
 	ibdev->ib_dev.create_cq		= mlx4_ib_create_cq;
+	ibdev->ib_dev.modify_cq		= mlx4_ib_modify_cq;
+	ibdev->ib_dev.resize_cq		= mlx4_ib_resize_cq;
 	ibdev->ib_dev.destroy_cq	= mlx4_ib_destroy_cq;
 	ibdev->ib_dev.poll_cq		= mlx4_ib_poll_cq;
 	ibdev->ib_dev.req_notify_cq	= mlx4_ib_arm_cq;
@@ -675,18 +682,20 @@
 }
 
 static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
-			  enum mlx4_dev_event event, int subtype,
-			  int port)
+			  enum mlx4_dev_event event, int port)
 {
 	struct ib_event ibev;
 
 	switch (event) {
-	case MLX4_EVENT_TYPE_PORT_CHANGE:
-		ibev.event = subtype == MLX4_PORT_CHANGE_SUBTYPE_ACTIVE ?
-			IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+	case MLX4_DEV_EVENT_PORT_UP:
+		ibev.event = IB_EVENT_PORT_ACTIVE;
 		break;
 
-	case MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR:
+	case MLX4_DEV_EVENT_PORT_DOWN:
+		ibev.event = IB_EVENT_PORT_ERR;
+		break;
+
+	case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
 		ibev.event = IB_EVENT_DEVICE_FATAL;
 		break;
 
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 3726e45..9e63732 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -78,13 +78,21 @@
 	struct mlx4_mtt		mtt;
 };
 
+struct mlx4_ib_cq_resize {
+	struct mlx4_ib_cq_buf	buf;
+	int			cqe;
+};
+
 struct mlx4_ib_cq {
 	struct ib_cq		ibcq;
 	struct mlx4_cq		mcq;
 	struct mlx4_ib_cq_buf	buf;
+	struct mlx4_ib_cq_resize *resize_buf;
 	struct mlx4_ib_db	db;
 	spinlock_t		lock;
+	struct mutex		resize_mutex;
 	struct ib_umem	       *umem;
+	struct ib_umem	       *resize_umem;
 };
 
 struct mlx4_ib_mr {
@@ -110,6 +118,10 @@
 	unsigned		tail;
 };
 
+enum mlx4_ib_qp_flags {
+	MLX4_IB_QP_LSO		= 1 << 0
+};
+
 struct mlx4_ib_qp {
 	struct ib_qp		ibqp;
 	struct mlx4_qp		mqp;
@@ -129,6 +141,7 @@
 	struct mlx4_mtt		mtt;
 	int			buf_size;
 	struct mutex		mutex;
+	u32			flags;
 	u8			port;
 	u8			alt_port;
 	u8			atomic_rd_en;
@@ -249,6 +262,8 @@
 				  struct ib_udata *udata);
 int mlx4_ib_dereg_mr(struct ib_mr *mr);
 
+int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
 struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
 				struct ib_ucontext *context,
 				struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 958e205..b75efae7 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -71,6 +71,7 @@
 
 static const __be32 mlx4_ib_opcode[] = {
 	[IB_WR_SEND]			= __constant_cpu_to_be32(MLX4_OPCODE_SEND),
+	[IB_WR_LSO]			= __constant_cpu_to_be32(MLX4_OPCODE_LSO),
 	[IB_WR_SEND_WITH_IMM]		= __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
 	[IB_WR_RDMA_WRITE]		= __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
 	[IB_WR_RDMA_WRITE_WITH_IMM]	= __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
@@ -122,7 +123,7 @@
  */
 static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
 {
-	u32 *wqe;
+	__be32 *wqe;
 	int i;
 	int s;
 	int ind;
@@ -143,7 +144,7 @@
 		buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
 		for (i = 64; i < s; i += 64) {
 			wqe = buf + i;
-			*wqe = 0xffffffff;
+			*wqe = cpu_to_be32(0xffffffff);
 		}
 	}
 }
@@ -242,7 +243,7 @@
 	}
 }
 
-static int send_wqe_overhead(enum ib_qp_type type)
+static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
 {
 	/*
 	 * UD WQEs must have a datagram segment.
@@ -253,7 +254,8 @@
 	switch (type) {
 	case IB_QPT_UD:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
-			sizeof (struct mlx4_wqe_datagram_seg);
+			sizeof (struct mlx4_wqe_datagram_seg) +
+			((flags & MLX4_IB_QP_LSO) ? 64 : 0);
 	case IB_QPT_UC:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
 			sizeof (struct mlx4_wqe_raddr_seg);
@@ -315,7 +317,7 @@
 	/* Sanity check SQ size before proceeding */
 	if (cap->max_send_wr	 > dev->dev->caps.max_wqes  ||
 	    cap->max_send_sge	 > dev->dev->caps.max_sq_sg ||
-	    cap->max_inline_data + send_wqe_overhead(type) +
+	    cap->max_inline_data + send_wqe_overhead(type, qp->flags) +
 	    sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
 		return -EINVAL;
 
@@ -329,7 +331,7 @@
 
 	s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg),
 		cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) +
-		send_wqe_overhead(type);
+		send_wqe_overhead(type, qp->flags);
 
 	/*
 	 * Hermon supports shrinking WQEs, such that a single work
@@ -394,7 +396,8 @@
 	}
 
 	qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) -
-			 send_wqe_overhead(type)) / sizeof (struct mlx4_wqe_data_seg);
+			 send_wqe_overhead(type, qp->flags)) /
+		sizeof (struct mlx4_wqe_data_seg);
 
 	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
 		(qp->sq.wqe_cnt << qp->sq.wqe_shift);
@@ -503,6 +506,9 @@
 	} else {
 		qp->sq_no_prefetch = 0;
 
+		if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
+			qp->flags |= MLX4_IB_QP_LSO;
+
 		err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
 		if (err)
 			goto err;
@@ -673,6 +679,13 @@
 	struct mlx4_ib_qp *qp;
 	int err;
 
+	/* We only support LSO, and only for kernel UD QPs. */
+	if (init_attr->create_flags & ~IB_QP_CREATE_IPOIB_UD_LSO)
+		return ERR_PTR(-EINVAL);
+	if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO &&
+	    (pd->uobject || init_attr->qp_type != IB_QPT_UD))
+		return ERR_PTR(-EINVAL);
+
 	switch (init_attr->qp_type) {
 	case IB_QPT_RC:
 	case IB_QPT_UC:
@@ -876,10 +889,15 @@
 		}
 	}
 
-	if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
-	    ibqp->qp_type == IB_QPT_UD)
+	if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
 		context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
-	else if (attr_mask & IB_QP_PATH_MTU) {
+	else if (ibqp->qp_type == IB_QPT_UD) {
+		if (qp->flags & MLX4_IB_QP_LSO)
+			context->mtu_msgmax = (IB_MTU_4096 << 5) |
+					      ilog2(dev->dev->caps.max_gso_sz);
+		else
+			context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
+	} else if (attr_mask & IB_QP_PATH_MTU) {
 		if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
 			printk(KERN_ERR "path MTU (%u) is invalid\n",
 			       attr->path_mtu);
@@ -1182,7 +1200,7 @@
 }
 
 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
-			    void *wqe)
+			    void *wqe, unsigned *mlx_seg_len)
 {
 	struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
 	struct mlx4_wqe_mlx_seg *mlx = wqe;
@@ -1231,7 +1249,7 @@
 	case IB_WR_SEND_WITH_IMM:
 		sqp->ud_header.bth.opcode	 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
 		sqp->ud_header.immediate_present = 1;
-		sqp->ud_header.immediate_data    = wr->imm_data;
+		sqp->ud_header.immediate_data    = wr->ex.imm_data;
 		break;
 	default:
 		return -EINVAL;
@@ -1303,7 +1321,9 @@
 		i = 2;
 	}
 
-	return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
+	*mlx_seg_len =
+		ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
+	return 0;
 }
 
 static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
@@ -1396,6 +1416,34 @@
 	dseg->addr       = cpu_to_be64(sg->addr);
 }
 
+static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr,
+			 struct mlx4_ib_qp *qp, unsigned *lso_seg_len)
+{
+	unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
+
+	/*
+	 * This is a temporary limitation and will be removed in
+	 * a forthcoming FW release:
+	 */
+	if (unlikely(halign > 64))
+		return -EINVAL;
+
+	if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
+		     wr->num_sge > qp->sq.max_gs - (halign >> 4)))
+		return -EINVAL;
+
+	memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
+
+	/* make sure LSO header is written before overwriting stamping */
+	wmb();
+
+	wqe->mss_hdr_size = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
+					wr->wr.ud.hlen);
+
+	*lso_seg_len = halign;
+	return 0;
+}
+
 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		      struct ib_send_wr **bad_wr)
 {
@@ -1409,6 +1457,7 @@
 	unsigned ind;
 	int uninitialized_var(stamp);
 	int uninitialized_var(size);
+	unsigned seglen;
 	int i;
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1436,11 +1485,14 @@
 			 cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
 			(wr->send_flags & IB_SEND_SOLICITED ?
 			 cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
+			((wr->send_flags & IB_SEND_IP_CSUM) ?
+			 cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
+				     MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
 			qp->sq_signal_bits;
 
 		if (wr->opcode == IB_WR_SEND_WITH_IMM ||
 		    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-			ctrl->imm = wr->imm_data;
+			ctrl->imm = wr->ex.imm_data;
 		else
 			ctrl->imm = 0;
 
@@ -1484,19 +1536,27 @@
 			set_datagram_seg(wqe, wr);
 			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
 			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+
+			if (wr->opcode == IB_WR_LSO) {
+				err = build_lso_seg(wqe, wr, qp, &seglen);
+				if (unlikely(err)) {
+					*bad_wr = wr;
+					goto out;
+				}
+				wqe  += seglen;
+				size += seglen / 16;
+			}
 			break;
 
 		case IB_QPT_SMI:
 		case IB_QPT_GSI:
-			err = build_mlx_header(to_msqp(qp), wr, ctrl);
-			if (err < 0) {
+			err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
+			if (unlikely(err)) {
 				*bad_wr = wr;
 				goto out;
 			}
-			wqe  += err;
-			size += err / 16;
-
-			err = 0;
+			wqe  += seglen;
+			size += seglen / 16;
 			break;
 
 		default:
@@ -1725,7 +1785,9 @@
 	struct mlx4_ib_qp *qp = to_mqp(ibqp);
 	struct mlx4_qp_context context;
 	int mlx4_state;
-	int err;
+	int err = 0;
+
+	mutex_lock(&qp->mutex);
 
 	if (qp->state == IB_QPS_RESET) {
 		qp_attr->qp_state = IB_QPS_RESET;
@@ -1733,12 +1795,15 @@
 	}
 
 	err = mlx4_qp_query(dev->dev, &qp->mqp, &context);
-	if (err)
-		return -EINVAL;
+	if (err) {
+		err = -EINVAL;
+		goto out;
+	}
 
 	mlx4_state = be32_to_cpu(context.flags) >> 28;
 
-	qp_attr->qp_state	     = to_ib_qp_state(mlx4_state);
+	qp->state		     = to_ib_qp_state(mlx4_state);
+	qp_attr->qp_state	     = qp->state;
 	qp_attr->path_mtu	     = context.mtu_msgmax >> 5;
 	qp_attr->path_mig_state	     =
 		to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
@@ -1797,6 +1862,8 @@
 
 	qp_init_attr->cap	     = qp_attr->cap;
 
-	return 0;
+out:
+	mutex_unlock(&qp->mutex);
+	return err;
 }
 
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 09a30dd..54d230e 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -219,7 +219,7 @@
 	__raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT)                |
 					       (1 << HCA_E_BIT)                 |
 					       (op_modifier << HCR_OPMOD_SHIFT) |
-					        op),                      ptr + offs[6]);
+						op),			  ptr + offs[6]);
 	wmb();
 	__raw_writel((__force u32) 0,                                     ptr + offs[7]);
 	wmb();
@@ -1339,6 +1339,10 @@
 	/* Check port for UD address vector: */
 	*(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1);
 
+	/* Enable IPoIB checksumming if we can: */
+	if (dev->device_cap_flags & IB_DEVICE_UD_IP_CSUM)
+		*(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(7 << 3);
+
 	/* We leave wqe_quota, responder_exu, etc as 0 (default) */
 
 	/* QPC/EEC/CQC/EQC/RDB attributes */
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index 2f976f2..8928ca4 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -103,6 +103,7 @@
 	DEV_LIM_FLAG_RAW_IPV6           = 1 << 4,
 	DEV_LIM_FLAG_RAW_ETHER          = 1 << 5,
 	DEV_LIM_FLAG_SRQ                = 1 << 6,
+	DEV_LIM_FLAG_IPOIB_CSUM		= 1 << 7,
 	DEV_LIM_FLAG_BAD_PKEY_CNTR      = 1 << 8,
 	DEV_LIM_FLAG_BAD_QKEY_CNTR      = 1 << 9,
 	DEV_LIM_FLAG_MW                 = 1 << 16,
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 1e1e336..20401d2 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -119,7 +119,8 @@
 	__be32 my_qpn;
 	__be32 my_ee;
 	__be32 rqpn;
-	__be16 sl_g_mlpath;
+	u8     sl_ipok;
+	u8     g_mlpath;
 	__be16 rlid;
 	__be32 imm_etype_pkey_eec;
 	__be32 byte_cnt;
@@ -493,6 +494,7 @@
 	int is_send;
 	int free_cqe = 1;
 	int err = 0;
+	u16 checksum;
 
 	cqe = next_cqe_sw(cq);
 	if (!cqe)
@@ -635,12 +637,14 @@
 			break;
 		}
 		entry->slid 	   = be16_to_cpu(cqe->rlid);
-		entry->sl   	   = be16_to_cpu(cqe->sl_g_mlpath) >> 12;
+		entry->sl   	   = cqe->sl_ipok >> 4;
 		entry->src_qp 	   = be32_to_cpu(cqe->rqpn) & 0xffffff;
-		entry->dlid_path_bits = be16_to_cpu(cqe->sl_g_mlpath) & 0x7f;
+		entry->dlid_path_bits = cqe->g_mlpath & 0x7f;
 		entry->pkey_index  = be32_to_cpu(cqe->imm_etype_pkey_eec) >> 16;
-		entry->wc_flags   |= be16_to_cpu(cqe->sl_g_mlpath) & 0x80 ?
-					IB_WC_GRH : 0;
+		entry->wc_flags   |= cqe->g_mlpath & 0x80 ? IB_WC_GRH : 0;
+		checksum = (be32_to_cpu(cqe->rqpn) >> 24) |
+				((be32_to_cpu(cqe->my_ee) >> 16) & 0xff00);
+		entry->csum_ok = (cqe->sl_ipok & 1 && checksum == 0xffff);
 	}
 
 	entry->status = IB_WC_SUCCESS;
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 7bbdd1f..0e842e0 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -54,8 +54,8 @@
 
 #define DRV_NAME	"ib_mthca"
 #define PFX		DRV_NAME ": "
-#define DRV_VERSION	"0.08"
-#define DRV_RELDATE	"February 14, 2006"
+#define DRV_VERSION	"1.0"
+#define DRV_RELDATE	"April 4, 2008"
 
 enum {
 	MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
@@ -390,11 +390,11 @@
 	do {                                                          \
 		void *__p = (char *) (source) + (offset);             \
 		switch (sizeof (dest)) {                              \
-			case 1: (dest) = *(u8 *) __p;       break;    \
-			case 2: (dest) = be16_to_cpup(__p); break;    \
-			case 4: (dest) = be32_to_cpup(__p); break;    \
-			case 8: (dest) = be64_to_cpup(__p); break;    \
-			default: __buggy_use_of_MTHCA_GET();          \
+		case 1: (dest) = *(u8 *) __p;       break;	      \
+		case 2: (dest) = be16_to_cpup(__p); break;	      \
+		case 4: (dest) = be32_to_cpup(__p); break;	      \
+		case 8: (dest) = be64_to_cpup(__p); break;	      \
+		default: __buggy_use_of_MTHCA_GET();		      \
 		}                                                     \
 	} while (0)
 
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index b60eb5d..8bde7f9 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -232,9 +232,9 @@
 	return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
 }
 
-static inline struct mthca_eqe* next_eqe_sw(struct mthca_eq *eq)
+static inline struct mthca_eqe *next_eqe_sw(struct mthca_eq *eq)
 {
-	struct mthca_eqe* eqe;
+	struct mthca_eqe *eqe;
 	eqe = get_eqe(eq, eq->cons_index);
 	return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe;
 }
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index acfa41d..8b7e83e 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -125,7 +125,7 @@
 			event.device           = ibdev;
 			event.element.port_num = port_num;
 
-			if(pinfo->clientrereg_resv_subnetto & 0x80)
+			if (pinfo->clientrereg_resv_subnetto & 0x80)
 				event.event    = IB_EVENT_CLIENT_REREGISTER;
 			else
 				event.event    = IB_EVENT_LID_CHANGE;
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index cd3d8ad..9ebadd6 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -267,11 +267,16 @@
 	if (dev_lim->flags & DEV_LIM_FLAG_SRQ)
 		mdev->mthca_flags |= MTHCA_FLAG_SRQ;
 
+	if (mthca_is_memfree(mdev))
+		if (dev_lim->flags & DEV_LIM_FLAG_IPOIB_CSUM)
+			mdev->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+
 	return 0;
 }
 
 static int mthca_init_tavor(struct mthca_dev *mdev)
 {
+	s64 size;
 	u8 status;
 	int err;
 	struct mthca_dev_lim        dev_lim;
@@ -324,9 +329,11 @@
 	if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
 		profile.num_srq = dev_lim.max_srqs;
 
-	err = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
-	if (err < 0)
+	size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
+	if (size < 0) {
+		err = size;
 		goto err_disable;
+	}
 
 	err = mthca_INIT_HCA(mdev, &init_hca, &status);
 	if (err) {
@@ -605,7 +612,7 @@
 	struct mthca_dev_lim        dev_lim;
 	struct mthca_profile        profile;
 	struct mthca_init_hca_param init_hca;
-	u64 icm_size;
+	s64 icm_size;
 	u8 status;
 	int err;
 
@@ -653,7 +660,7 @@
 		profile.num_srq = dev_lim.max_srqs;
 
 	icm_size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
-	if ((int) icm_size < 0) {
+	if (icm_size < 0) {
 		err = icm_size;
 		goto err_stop_fw;
 	}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 252db08..b224079 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -359,12 +359,14 @@
 					      int use_lowmem, int use_coherent)
 {
 	struct mthca_icm_table *table;
+	int obj_per_chunk;
 	int num_icm;
 	unsigned chunk_size;
 	int i;
 	u8 status;
 
-	num_icm = (obj_size * nobj + MTHCA_TABLE_CHUNK_SIZE - 1) / MTHCA_TABLE_CHUNK_SIZE;
+	obj_per_chunk = MTHCA_TABLE_CHUNK_SIZE / obj_size;
+	num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);
 
 	table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
 	if (!table)
@@ -412,7 +414,7 @@
 		if (table->icm[i]) {
 			mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
 					MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
-				        &status);
+					&status);
 			mthca_free_icm(dev, table->icm[i], table->coherent);
 		}
 
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 26bf86d..605a8d5 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -63,7 +63,7 @@
 	MTHCA_NUM_PDS = 1 << 15
 };
 
-u64 mthca_make_profile(struct mthca_dev *dev,
+s64 mthca_make_profile(struct mthca_dev *dev,
 		       struct mthca_profile *request,
 		       struct mthca_dev_lim *dev_lim,
 		       struct mthca_init_hca_param *init_hca)
@@ -77,7 +77,7 @@
 	};
 
 	u64 mem_base, mem_avail;
-	u64 total_size = 0;
+	s64 total_size = 0;
 	struct mthca_resource *profile;
 	struct mthca_resource tmp;
 	int i, j;
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h
index 9464180..e76cb62 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.h
+++ b/drivers/infiniband/hw/mthca/mthca_profile.h
@@ -53,7 +53,7 @@
 	int fmr_reserved_mtts;
 };
 
-u64 mthca_make_profile(struct mthca_dev *mdev,
+s64 mthca_make_profile(struct mthca_dev *mdev,
 		       struct mthca_profile *request,
 		       struct mthca_dev_lim *dev_lim,
 		       struct mthca_init_hca_param *init_hca);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 9e491df..81b257e 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -60,7 +60,7 @@
 	struct ib_smp *in_mad  = NULL;
 	struct ib_smp *out_mad = NULL;
 	int err = -ENOMEM;
-	struct mthca_dev* mdev = to_mdev(ibdev);
+	struct mthca_dev *mdev = to_mdev(ibdev);
 
 	u8 status;
 
@@ -540,6 +540,9 @@
 	struct mthca_qp *qp;
 	int err;
 
+	if (init_attr->create_flags)
+		return ERR_PTR(-EINVAL);
+
 	switch (init_attr->qp_type) {
 	case IB_QPT_RC:
 	case IB_QPT_UC:
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index db5595b..09dc361 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -437,29 +437,34 @@
 	int mthca_state;
 	u8 status;
 
+	mutex_lock(&qp->mutex);
+
 	if (qp->state == IB_QPS_RESET) {
 		qp_attr->qp_state = IB_QPS_RESET;
 		goto done;
 	}
 
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
-	if (IS_ERR(mailbox))
-		return PTR_ERR(mailbox);
+	if (IS_ERR(mailbox)) {
+		err = PTR_ERR(mailbox);
+		goto out;
+	}
 
 	err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox, &status);
 	if (err)
-		goto out;
+		goto out_mailbox;
 	if (status) {
 		mthca_warn(dev, "QUERY_QP returned status %02x\n", status);
 		err = -EINVAL;
-		goto out;
+		goto out_mailbox;
 	}
 
 	qp_param    = mailbox->buf;
 	context     = &qp_param->context;
 	mthca_state = be32_to_cpu(context->flags) >> 28;
 
-	qp_attr->qp_state 	     = to_ib_qp_state(mthca_state);
+	qp->state		     = to_ib_qp_state(mthca_state);
+	qp_attr->qp_state	     = qp->state;
 	qp_attr->path_mtu 	     = context->mtu_msgmax >> 5;
 	qp_attr->path_mig_state      =
 		to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
@@ -506,8 +511,11 @@
 
 	qp_init_attr->cap	     = qp_attr->cap;
 
-out:
+out_mailbox:
 	mthca_free_mailbox(dev, mailbox);
+
+out:
+	mutex_unlock(&qp->mutex);
 	return err;
 }
 
@@ -1532,7 +1540,7 @@
 	case IB_WR_SEND_WITH_IMM:
 		sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
 		sqp->ud_header.immediate_present = 1;
-		sqp->ud_header.immediate_data = wr->imm_data;
+		sqp->ud_header.immediate_data = wr->ex.imm_data;
 		break;
 	default:
 		return -EINVAL;
@@ -1679,7 +1687,7 @@
 			cpu_to_be32(1);
 		if (wr->opcode == IB_WR_SEND_WITH_IMM ||
 		    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-			((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
+			((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
 
 		wqe += sizeof (struct mthca_next_seg);
 		size = sizeof (struct mthca_next_seg) / 16;
@@ -2015,10 +2023,12 @@
 			 cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
 			((wr->send_flags & IB_SEND_SOLICITED) ?
 			 cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0)   |
+			((wr->send_flags & IB_SEND_IP_CSUM) ?
+			 cpu_to_be32(MTHCA_NEXT_IP_CSUM | MTHCA_NEXT_TCP_UDP_CSUM) : 0) |
 			cpu_to_be32(1);
 		if (wr->opcode == IB_WR_SEND_WITH_IMM ||
 		    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-			((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
+			((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
 
 		wqe += sizeof (struct mthca_next_seg);
 		size = sizeof (struct mthca_next_seg) / 16;
diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h
index f6a66fe..b3551a8 100644
--- a/drivers/infiniband/hw/mthca/mthca_wqe.h
+++ b/drivers/infiniband/hw/mthca/mthca_wqe.h
@@ -38,14 +38,16 @@
 #include <linux/types.h>
 
 enum {
-	MTHCA_NEXT_DBD       = 1 << 7,
-	MTHCA_NEXT_FENCE     = 1 << 6,
-	MTHCA_NEXT_CQ_UPDATE = 1 << 3,
-	MTHCA_NEXT_EVENT_GEN = 1 << 2,
-	MTHCA_NEXT_SOLICIT   = 1 << 1,
+	MTHCA_NEXT_DBD		= 1 << 7,
+	MTHCA_NEXT_FENCE	= 1 << 6,
+	MTHCA_NEXT_CQ_UPDATE	= 1 << 3,
+	MTHCA_NEXT_EVENT_GEN	= 1 << 2,
+	MTHCA_NEXT_SOLICIT	= 1 << 1,
+	MTHCA_NEXT_IP_CSUM	= 1 << 4,
+	MTHCA_NEXT_TCP_UDP_CSUM = 1 << 5,
 
-	MTHCA_MLX_VL15       = 1 << 17,
-	MTHCA_MLX_SLR        = 1 << 16
+	MTHCA_MLX_VL15		= 1 << 17,
+	MTHCA_MLX_SLR		= 1 << 16
 };
 
 enum {
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index b2112f5..b00b0e3 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -65,7 +65,6 @@
 MODULE_VERSION(DRV_VERSION);
 
 int max_mtu = 9000;
-int nics_per_function = 1;
 int interrupt_mod_interval = 0;
 
 
@@ -93,15 +92,9 @@
 MODULE_PARM_DESC(debug_level, "Enable debug output level");
 
 LIST_HEAD(nes_adapter_list);
-LIST_HEAD(nes_dev_list);
+static LIST_HEAD(nes_dev_list);
 
 atomic_t qps_destroyed;
-atomic_t cqp_reqs_allocated;
-atomic_t cqp_reqs_freed;
-atomic_t cqp_reqs_dynallocated;
-atomic_t cqp_reqs_dynfreed;
-atomic_t cqp_reqs_queued;
-atomic_t cqp_reqs_redriven;
 
 static void nes_print_macaddr(struct net_device *netdev);
 static irqreturn_t nes_interrupt(int, void *);
@@ -310,7 +303,7 @@
 
 	if (atomic_read(&nesqp->refcount) == 0) {
 		printk(KERN_INFO PFX "%s: Reference count already 0 for QP%d, last aeq = 0x%04X.\n",
-				__FUNCTION__, ibqp->qp_num, nesqp->last_aeq);
+				__func__, ibqp->qp_num, nesqp->last_aeq);
 		BUG();
 	}
 
@@ -751,13 +744,13 @@
 
 	list_del(&nesdev->list);
 	nes_destroy_cqp(nesdev);
+
+	free_irq(pcidev->irq, nesdev);
 	tasklet_kill(&nesdev->dpc_tasklet);
 
 	/* Deallocate the Adapter Structure */
 	nes_destroy_adapter(nesdev->nesadapter);
 
-	free_irq(pcidev->irq, nesdev);
-
 	if (nesdev->msi_enabled) {
 		pci_disable_msi(pcidev);
 	}
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index a48b288..1626124 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -143,12 +143,12 @@
 #ifdef CONFIG_INFINIBAND_NES_DEBUG
 #define nes_debug(level, fmt, args...) \
 	if (level & nes_debug_level) \
-		printk(KERN_ERR PFX "%s[%u]: " fmt, __FUNCTION__, __LINE__, ##args)
+		printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args)
 
 #define assert(expr)                                                \
 if (!(expr)) {                                                       \
 	printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n",  \
-		   #expr, __FILE__, __FUNCTION__, __LINE__);                \
+		   #expr, __FILE__, __func__, __LINE__);                \
 }
 
 #define NES_EVENT_TIMEOUT   1200000
@@ -166,7 +166,6 @@
 #include "nes_cm.h"
 
 extern int max_mtu;
-extern int nics_per_function;
 #define max_frame_len (max_mtu+ETH_HLEN)
 extern int interrupt_mod_interval;
 extern int nes_if_count;
@@ -177,9 +176,6 @@
 extern unsigned int nes_debug_level;
 
 extern struct list_head nes_adapter_list;
-extern struct list_head nes_dev_list;
-
-extern struct nes_cm_core *g_cm_core;
 
 extern atomic_t cm_connects;
 extern atomic_t cm_accepts;
@@ -209,7 +205,6 @@
 extern atomic_t cm_accel_dropped_pkts;
 extern atomic_t cm_resets_recvd;
 
-extern u32 crit_err_count;
 extern u32 int_mod_timer_init;
 extern u32 int_mod_cq_depth_256;
 extern u32 int_mod_cq_depth_128;
@@ -219,14 +214,6 @@
 extern u32 int_mod_cq_depth_4;
 extern u32 int_mod_cq_depth_1;
 
-extern atomic_t cqp_reqs_allocated;
-extern atomic_t cqp_reqs_freed;
-extern atomic_t cqp_reqs_dynallocated;
-extern atomic_t cqp_reqs_dynfreed;
-extern atomic_t cqp_reqs_queued;
-extern atomic_t cqp_reqs_redriven;
-
-
 struct nes_device {
 	struct nes_adapter	   *nesadapter;
 	void __iomem           *regs;
@@ -412,7 +399,7 @@
 	if (resource_num >= max_resources) {
 		resource_num = find_first_zero_bit(resource_array, max_resources);
 		if (resource_num >= max_resources) {
-			printk(KERN_ERR PFX "%s: No available resourcess.\n", __FUNCTION__);
+			printk(KERN_ERR PFX "%s: No available resourcess.\n", __func__);
 			spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
 			return -EMFILE;
 		}
@@ -510,9 +497,6 @@
 /* nes_hw.c */
 struct nes_adapter *nes_init_adapter(struct nes_device *, u8);
 void  nes_nic_init_timer_defaults(struct nes_device *, u8);
-unsigned int nes_reset_adapter_ne020(struct nes_device *, u8 *);
-int nes_init_serdes(struct nes_device *, u8, u8, u8);
-void nes_init_csr_ne020(struct nes_device *, u8, u8);
 void nes_destroy_adapter(struct nes_adapter *);
 int nes_init_cqp(struct nes_device *);
 int nes_init_phy(struct nes_device *);
@@ -520,20 +504,12 @@
 void nes_destroy_nic_qp(struct nes_vnic *);
 int nes_napi_isr(struct nes_device *);
 void nes_dpc(unsigned long);
-void nes_process_ceq(struct nes_device *, struct nes_hw_ceq *);
-void nes_process_aeq(struct nes_device *, struct nes_hw_aeq *);
-void nes_process_mac_intr(struct nes_device *, u32);
-void nes_nic_napi_ce_handler(struct nes_device *, struct nes_hw_nic_cq *);
 void nes_nic_ce_handler(struct nes_device *, struct nes_hw_nic_cq *);
-void nes_cqp_ce_handler(struct nes_device *, struct nes_hw_cq *);
-void nes_process_iwarp_aeqe(struct nes_device *, struct nes_hw_aeqe *);
 void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *);
 int nes_destroy_cqp(struct nes_device *);
 int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
 
 /* nes_nic.c */
-void nes_netdev_set_multicast_list(struct net_device *);
-void nes_netdev_exit(struct nes_vnic *);
 struct net_device *nes_netdev_init(struct nes_device *, void __iomem *);
 void nes_netdev_destroy(struct net_device *);
 int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
@@ -544,7 +520,6 @@
 void nes_update_arp(unsigned char *, u32, u32, u16, u16);
 void nes_manage_arp_cache(struct net_device *, unsigned char *, u32, u32);
 void nes_sock_release(struct nes_qp *, unsigned long *);
-struct nes_cm_core *nes_cm_alloc_core(void);
 void flush_wqes(struct nes_device *nesdev, struct nes_qp *, u32, u32);
 int nes_manage_apbvt(struct nes_vnic *, u32, u32, u32);
 int nes_cm_disconn(struct nes_qp *);
@@ -556,7 +531,6 @@
 struct nes_ib_device *nes_init_ofa_device(struct net_device *);
 void nes_destroy_ofa_device(struct nes_ib_device *);
 int nes_register_ofa_device(struct nes_ib_device *);
-void nes_unregister_ofa_device(struct nes_ib_device *);
 
 /* nes_util.c */
 int nes_read_eeprom_values(struct nes_device *, struct nes_adapter *);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 0bef878..d073862 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -80,7 +80,30 @@
 static int add_ref_cm_node(struct nes_cm_node *);
 static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
 static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *);
+static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *,
+				     void *, u32, void *, u32, u8);
+static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node);
 
+static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *,
+					   struct nes_vnic *,
+					   struct ietf_mpa_frame *,
+					   struct nes_cm_info *);
+static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *,
+			  struct nes_cm_node *);
+static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *,
+			  struct nes_cm_node *);
+static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *);
+static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *,
+			    struct sk_buff *);
+static int mini_cm_dealloc_core(struct nes_cm_core *);
+static int mini_cm_get(struct nes_cm_core *);
+static int mini_cm_set(struct nes_cm_core *, u32, u32);
+static int nes_cm_disconn_true(struct nes_qp *);
+static int nes_cm_post_event(struct nes_cm_event *event);
+static int nes_disconnect(struct nes_qp *nesqp, int abrupt);
+static void nes_disconnect_worker(struct work_struct *work);
+static int send_ack(struct nes_cm_node *cm_node);
+static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb);
 
 /* External CM API Interface */
 /* instance of function pointers for client API */
@@ -99,7 +122,7 @@
 	mini_cm_set
 };
 
-struct nes_cm_core *g_cm_core;
+static struct nes_cm_core *g_cm_core;
 
 atomic_t cm_connects;
 atomic_t cm_accepts;
@@ -149,7 +172,7 @@
 /**
  * send_mpa_request
  */
-int send_mpa_request(struct nes_cm_node *cm_node)
+static int send_mpa_request(struct nes_cm_node *cm_node)
 {
 	struct sk_buff *skb;
 	int ret;
@@ -243,8 +266,9 @@
  * form_cm_frame - get a free packet and build empty frame Use
  * node info to build.
  */
-struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm_node,
-		void *options, u32 optionsize, void *data, u32 datasize, u8 flags)
+static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm_node,
+				     void *options, u32 optionsize, void *data,
+				     u32 datasize, u8 flags)
 {
 	struct tcphdr *tcph;
 	struct iphdr *iph;
@@ -342,7 +366,6 @@
 	if (!core)
 		return;
 	nes_debug(NES_DBG_CM, "---------------------------------------------\n");
-	nes_debug(NES_DBG_CM, "Session ID    : %u \n", atomic_read(&core->session_id));
 
 	nes_debug(NES_DBG_CM, "State         : %u \n",  core->state);
 
@@ -395,7 +418,7 @@
 	}
 
 	if (type == NES_TIMER_TYPE_SEND) {
-		new_send->seq_num = htonl(tcp_hdr(skb)->seq);
+		new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
 		atomic_inc(&new_send->skb->users);
 
 		ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev);
@@ -420,7 +443,7 @@
 		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
 	}
 	if (type == NES_TIMER_TYPE_RECV) {
-		new_send->seq_num = htonl(tcp_hdr(skb)->seq);
+		new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
 		new_send->timetosend = jiffies;
 		spin_lock_irqsave(&cm_node->recv_list_lock, flags);
 		list_add_tail(&new_send->list, &cm_node->recv_list);
@@ -442,7 +465,7 @@
 /**
  * nes_cm_timer_tick
  */
-void nes_cm_timer_tick(unsigned long pass)
+static void nes_cm_timer_tick(unsigned long pass)
 {
 	unsigned long flags, qplockflags;
 	unsigned long nexttimeout = jiffies + NES_LONG_TIME;
@@ -644,7 +667,7 @@
 /**
  * send_syn
  */
-int send_syn(struct nes_cm_node *cm_node, u32 sendack)
+static int send_syn(struct nes_cm_node *cm_node, u32 sendack)
 {
 	int ret;
 	int flags = SET_SYN;
@@ -710,7 +733,7 @@
 /**
  * send_reset
  */
-int send_reset(struct nes_cm_node *cm_node)
+static int send_reset(struct nes_cm_node *cm_node)
 {
 	int ret;
 	struct sk_buff *skb = get_free_pkt(cm_node);
@@ -732,7 +755,7 @@
 /**
  * send_ack
  */
-int send_ack(struct nes_cm_node *cm_node)
+static int send_ack(struct nes_cm_node *cm_node)
 {
 	int ret;
 	struct sk_buff *skb = get_free_pkt(cm_node);
@@ -752,7 +775,7 @@
 /**
  * send_fin
  */
-int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
+static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
 {
 	int ret;
 
@@ -775,7 +798,7 @@
 /**
  * get_free_pkt
  */
-struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node)
+static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node)
 {
 	struct sk_buff *skb, *new_skb;
 
@@ -820,7 +843,6 @@
 {
 	unsigned long flags;
 	u32 hashkey;
-	struct list_head *list_pos;
 	struct list_head *hte;
 	struct nes_cm_node *cm_node;
 
@@ -835,8 +857,7 @@
 
 	/* walk list and find cm_node associated with this session ID */
 	spin_lock_irqsave(&cm_core->ht_lock, flags);
-	list_for_each(list_pos, hte) {
-		cm_node = container_of(list_pos, struct nes_cm_node, list);
+	list_for_each_entry(cm_node, hte, list) {
 		/* compare quad, return node handle if a match */
 		nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n",
 				cm_node->loc_addr, cm_node->loc_port,
@@ -864,13 +885,11 @@
 		nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state)
 {
 	unsigned long flags;
-	struct list_head *listen_list;
 	struct nes_cm_listener *listen_node;
 
 	/* walk list and find cm_node associated with this session ID */
 	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
-	list_for_each(listen_list, &cm_core->listen_list.list) {
-		listen_node = container_of(listen_list, struct nes_cm_listener, list);
+	list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
 		/* compare node pair, return node handle if a match */
 		if (((listen_node->loc_addr == dst_addr) ||
 				listen_node->loc_addr == 0x00000000) &&
@@ -1014,7 +1033,7 @@
 	fl.nl_u.ip4_u.daddr = htonl(dst_ip);
 	if (ip_route_output_key(&init_net, &rt, &fl)) {
 		printk("%s: ip_route_output_key failed for 0x%08X\n",
-				__FUNCTION__, dst_ip);
+				__func__, dst_ip);
 		return;
 	}
 
@@ -1077,8 +1096,6 @@
 	cm_node->tcp_cntxt.rcv_nxt = 0;
 	/* get a unique session ID , add thread_id to an upcounter to handle race */
 	atomic_inc(&cm_core->node_cnt);
-	atomic_inc(&cm_core->session_id);
-	cm_node->session_id = (u32)(atomic_read(&cm_core->session_id) + current->tgid);
 	cm_node->conn_type = cm_info->conn_type;
 	cm_node->apbvt_set = 0;
 	cm_node->accept_pend = 0;
@@ -1239,7 +1256,7 @@
 				continue;
 			case OPTION_NUMBER_MSS:
 				nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d Size: %d\n",
-						__FUNCTION__,
+						__func__,
 						all_options->as_mss.length, offset, optionsize);
 				got_mss_option = 1;
 				if (all_options->as_mss.length != 4) {
@@ -1272,8 +1289,8 @@
 /**
  * process_packet
  */
-int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
-		struct nes_cm_core *cm_core)
+static int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
+			  struct nes_cm_core *cm_core)
 {
 	int optionsize;
 	int datasize;
@@ -1360,7 +1377,7 @@
 	if (optionsize) {
 		u8 *optionsloc = (u8 *)&tcph[1];
 		if (process_options(cm_node, optionsloc, optionsize, (u32)tcph->syn)) {
-			nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", __FUNCTION__, cm_node);
+			nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", __func__, cm_node);
 			send_reset(cm_node);
 			if (cm_node->state != NES_CM_STATE_SYN_SENT)
 			rem_ref_cm_node(cm_core, cm_node);
@@ -1605,9 +1622,7 @@
 	listener->cm_core = cm_core;
 	listener->nesvnic = nesvnic;
 	atomic_inc(&cm_core->node_cnt);
-	atomic_inc(&cm_core->session_id);
 
-	listener->session_id = (u32)(atomic_read(&cm_core->session_id) + current->tgid);
 	listener->conn_type = cm_info->conn_type;
 	listener->backlog = cm_info->backlog;
 	listener->listener_state = NES_CM_LISTENER_ACTIVE_STATE;
@@ -1631,9 +1646,10 @@
 /**
  * mini_cm_connect - make a connection node with params
  */
-struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
-		struct nes_vnic *nesvnic, struct ietf_mpa_frame *mpa_frame,
-		struct nes_cm_info *cm_info)
+static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
+					   struct nes_vnic *nesvnic,
+					   struct ietf_mpa_frame *mpa_frame,
+					   struct nes_cm_info *cm_info)
 {
 	int ret = 0;
 	struct nes_cm_node *cm_node;
@@ -1717,8 +1733,8 @@
  * mini_cm_accept - accept a connection
  * This function is never called
  */
-int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mpa_frame,
-		struct nes_cm_node *cm_node)
+static int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mpa_frame,
+			  struct nes_cm_node *cm_node)
 {
 	return 0;
 }
@@ -1727,9 +1743,9 @@
 /**
  * mini_cm_reject - reject and teardown a connection
  */
-int mini_cm_reject(struct nes_cm_core *cm_core,
-		struct ietf_mpa_frame *mpa_frame,
-		struct nes_cm_node *cm_node)
+static int mini_cm_reject(struct nes_cm_core *cm_core,
+			  struct ietf_mpa_frame *mpa_frame,
+			  struct nes_cm_node *cm_node)
 {
 	int ret = 0;
 	struct sk_buff *skb;
@@ -1761,7 +1777,7 @@
 /**
  * mini_cm_close
  */
-int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
+static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
 {
 	int ret = 0;
 
@@ -1808,8 +1824,8 @@
  * recv_pkt - recv an ETHERNET packet, and process it through CM
  * node state machine
  */
-int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic,
-		struct sk_buff *skb)
+static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic,
+			    struct sk_buff *skb)
 {
 	struct nes_cm_node *cm_node = NULL;
 	struct nes_cm_listener *listener = NULL;
@@ -1898,7 +1914,7 @@
 /**
  * nes_cm_alloc_core - allocate a top level instance of a cm core
  */
-struct nes_cm_core *nes_cm_alloc_core(void)
+static struct nes_cm_core *nes_cm_alloc_core(void)
 {
 	int i;
 
@@ -1919,7 +1935,6 @@
 	cm_core->state = NES_CM_STATE_INITED;
 	cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS;
 
-	atomic_set(&cm_core->session_id, 0);
 	atomic_set(&cm_core->events_posted, 0);
 
 	/* init the packet lists */
@@ -1958,7 +1973,7 @@
 /**
  * mini_cm_dealloc_core - deallocate a top level instance of a cm core
  */
-int mini_cm_dealloc_core(struct nes_cm_core *cm_core)
+static int mini_cm_dealloc_core(struct nes_cm_core *cm_core)
 {
 	nes_debug(NES_DBG_CM, "De-Alloc CM Core (%p)\n", cm_core);
 
@@ -1983,7 +1998,7 @@
 /**
  * mini_cm_get
  */
-int mini_cm_get(struct nes_cm_core *cm_core)
+static int mini_cm_get(struct nes_cm_core *cm_core)
 {
 	return cm_core->state;
 }
@@ -1992,7 +2007,7 @@
 /**
  * mini_cm_set
  */
-int mini_cm_set(struct nes_cm_core *cm_core, u32 type, u32 value)
+static int mini_cm_set(struct nes_cm_core *cm_core, u32 type, u32 value)
 {
 	int ret = 0;
 
@@ -2109,7 +2124,7 @@
 /**
  * nes_disconnect_worker
  */
-void nes_disconnect_worker(struct work_struct *work)
+static void nes_disconnect_worker(struct work_struct *work)
 {
 	struct nes_qp *nesqp = container_of(work, struct nes_qp, disconn_work);
 
@@ -2122,7 +2137,7 @@
 /**
  * nes_cm_disconn_true
  */
-int nes_cm_disconn_true(struct nes_qp *nesqp)
+static int nes_cm_disconn_true(struct nes_qp *nesqp)
 {
 	unsigned long flags;
 	int ret = 0;
@@ -2265,7 +2280,7 @@
 /**
  * nes_disconnect
  */
-int nes_disconnect(struct nes_qp *nesqp, int abrupt)
+static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
 {
 	int ret = 0;
 	struct nes_vnic *nesvnic;
@@ -2482,7 +2497,7 @@
 	}
 	if (ret)
 		printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
-				__FUNCTION__, __LINE__, ret);
+				__func__, __LINE__, ret);
 
 	return 0;
 }
@@ -2650,7 +2665,7 @@
 	cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
 	if (!cm_node) {
 		printk("%s[%u] Error returned from listen API call\n",
-				__FUNCTION__, __LINE__);
+				__func__, __LINE__);
 		return -ENOMEM;
 	}
 
@@ -2740,7 +2755,7 @@
  * cm_event_connected
  * handle a connected event, setup QPs and HW
  */
-void cm_event_connected(struct nes_cm_event *event)
+static void cm_event_connected(struct nes_cm_event *event)
 {
 	u64 u64temp;
 	struct nes_qp *nesqp;
@@ -2864,7 +2879,7 @@
 
 	if (ret)
 		printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
-				__FUNCTION__, __LINE__, ret);
+				__func__, __LINE__, ret);
 	nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = %lu\n",
 			nesqp->hwqp.qp_id, jiffies );
 
@@ -2877,7 +2892,7 @@
 /**
  * cm_event_connect_error
  */
-void cm_event_connect_error(struct nes_cm_event *event)
+static void cm_event_connect_error(struct nes_cm_event *event)
 {
 	struct nes_qp *nesqp;
 	struct iw_cm_id *cm_id;
@@ -2919,7 +2934,7 @@
 	nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
 	if (ret)
 		printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
-				__FUNCTION__, __LINE__, ret);
+				__func__, __LINE__, ret);
 	nes_rem_ref(&nesqp->ibqp);
 		cm_id->rem_ref(cm_id);
 
@@ -2930,7 +2945,7 @@
 /**
  * cm_event_reset
  */
-void cm_event_reset(struct nes_cm_event *event)
+static void cm_event_reset(struct nes_cm_event *event)
 {
 	struct nes_qp *nesqp;
 	struct iw_cm_id *cm_id;
@@ -2973,7 +2988,7 @@
 /**
  * cm_event_mpa_req
  */
-void cm_event_mpa_req(struct nes_cm_event *event)
+static void cm_event_mpa_req(struct nes_cm_event *event)
 {
 	struct iw_cm_id   *cm_id;
 	struct iw_cm_event cm_event;
@@ -3007,7 +3022,7 @@
 	ret = cm_id->event_handler(cm_id, &cm_event);
 	if (ret)
 		printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
-				__FUNCTION__, __LINE__, ret);
+				__func__, __LINE__, ret);
 
 	return;
 }
@@ -3019,7 +3034,7 @@
  * nes_cm_post_event
  * post an event to the cm event handler
  */
-int nes_cm_post_event(struct nes_cm_event *event)
+static int nes_cm_post_event(struct nes_cm_event *event)
 {
 	atomic_inc(&event->cm_node->cm_core->events_posted);
 	add_ref_cm_node(event->cm_node);
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index a59f0a7..7717cb2 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -225,7 +225,6 @@
 
 struct nes_cm_listener {
 	struct list_head           list;
-	u64                        session_id;
 	struct nes_cm_core         *cm_core;
 	u8                         loc_mac[ETH_ALEN];
 	nes_addr_t                 loc_addr;
@@ -242,7 +241,6 @@
 
 /* per connection node and node state information */
 struct nes_cm_node {
-	u64                       session_id;
 	u32                       hashkey;
 
 	nes_addr_t                loc_addr, rem_addr;
@@ -327,7 +325,6 @@
 
 struct nes_cm_core {
 	enum nes_cm_node_state  state;
-	atomic_t                session_id;
 
 	atomic_t                listen_node_cnt;
 	struct nes_cm_node      listen_list;
@@ -383,35 +380,10 @@
 	int (*set)(struct nes_cm_core *, u32, u32);
 };
 
-
-int send_mpa_request(struct nes_cm_node *);
-struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *,
-		void *, u32, void *, u32, u8);
 int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *,
 		enum nes_timer_type, int, int);
-void nes_cm_timer_tick(unsigned long);
-int send_syn(struct nes_cm_node *, u32);
-int send_reset(struct nes_cm_node *);
-int send_ack(struct nes_cm_node *);
-int send_fin(struct nes_cm_node *, struct sk_buff *);
-struct sk_buff *get_free_pkt(struct nes_cm_node *);
-int process_packet(struct nes_cm_node *, struct sk_buff *, struct nes_cm_core *);
-
-struct nes_cm_node * mini_cm_connect(struct nes_cm_core *,
-		struct nes_vnic *, struct ietf_mpa_frame *, struct nes_cm_info *);
-int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *);
-int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *);
-int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *);
-int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *);
-struct nes_cm_core *mini_cm_alloc_core(struct nes_cm_info *);
-int mini_cm_dealloc_core(struct nes_cm_core *);
-int mini_cm_get(struct nes_cm_core *);
-int mini_cm_set(struct nes_cm_core *, u32, u32);
 
 int nes_cm_disconn(struct nes_qp *);
-void nes_disconnect_worker(struct work_struct *);
-int nes_cm_disconn_true(struct nes_qp *);
-int nes_disconnect(struct nes_qp *, int);
 
 int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
 int nes_reject(struct iw_cm_id *, const void *, u8);
@@ -423,11 +395,4 @@
 int nes_cm_start(void);
 int nes_cm_stop(void);
 
-/* CM event handler functions */
-void cm_event_connected(struct nes_cm_event *);
-void cm_event_connect_error(struct nes_cm_event *);
-void cm_event_reset(struct nes_cm_event *);
-void cm_event_mpa_req(struct nes_cm_event *);
-int nes_cm_post_event(struct nes_cm_event *);
-
 #endif			/* NES_CM_H */
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 49e53e4..aa53aab 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -41,7 +41,7 @@
 
 #include "nes.h"
 
-u32 crit_err_count = 0;
+static u32 crit_err_count;
 u32 int_mod_timer_init;
 u32 int_mod_cq_depth_256;
 u32 int_mod_cq_depth_128;
@@ -53,6 +53,17 @@
 
 #include "nes_cm.h"
 
+static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq);
+static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count);
+static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
+			   u8 OneG_Mode);
+static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq);
+static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq);
+static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq);
+static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
+				   struct nes_hw_aeqe *aeqe);
+static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
+static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
 
 #ifdef CONFIG_INFINIBAND_NES_DEBUG
 static unsigned char *nes_iwarp_state_str[] = {
@@ -370,7 +381,7 @@
 		nesadapter->et_use_adaptive_rx_coalesce = 1;
 		nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC;
 		nesadapter->et_rx_coalesce_usecs_irq = 0;
-		printk(PFX "%s: Using Adaptive Interrupt Moderation\n", __FUNCTION__);
+		printk(PFX "%s: Using Adaptive Interrupt Moderation\n", __func__);
 	}
 	/* Setup and enable the periodic timer */
 	if (nesadapter->et_rx_coalesce_usecs_irq)
@@ -382,7 +393,7 @@
 	nesadapter->base_pd = 1;
 
 	nesadapter->device_cap_flags =
-			IB_DEVICE_ZERO_STAG | IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW;
+		IB_DEVICE_ZERO_STAG | IB_DEVICE_MEM_WINDOW;
 
 	nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
 			[(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
@@ -572,7 +583,7 @@
 		if (vendor_id == 0xffff)
 			break;
 	}
-	nes_debug(NES_DBG_INIT, "%s %d functions found for %s.\n", __FUNCTION__,
+	nes_debug(NES_DBG_INIT, "%s %d functions found for %s.\n", __func__,
 		func_index, pci_name(nesdev->pcidev));
 	nesadapter->adapter_fcn_count = func_index;
 
@@ -583,7 +594,7 @@
 /**
  * nes_reset_adapter_ne020
  */
-unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode)
+static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode)
 {
 	u32 port_count;
 	u32 u32temp;
@@ -691,7 +702,8 @@
 /**
  * nes_init_serdes
  */
-int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, u8  OneG_Mode)
+static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
+			   u8 OneG_Mode)
 {
 	int i;
 	u32 u32temp;
@@ -739,7 +751,7 @@
 				& 0x0000000f)) != 0x0000000f) && (i++ < 5000))
 				mdelay(1);
 			if (i >= 5000) {
-				printk("%s: Init: serdes 1 not ready, status=%x\n", __FUNCTION__, u32temp);
+				printk("%s: Init: serdes 1 not ready, status=%x\n", __func__, u32temp);
 				/* return 1; */
 			}
 			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x000bdef7);
@@ -760,7 +772,7 @@
  * nes_init_csr_ne020
  * Initialize registers for ne020 hardware
  */
-void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count)
+static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count)
 {
 	u32 u32temp;
 
@@ -1204,7 +1216,7 @@
 	if (nesadapter->OneG_Mode) {
 		nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index);
 		if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) {
-			printk(PFX "%s: Programming mdc config for 1G\n", __FUNCTION__);
+			printk(PFX "%s: Programming mdc config for 1G\n", __func__);
 			tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
 			tx_config |= 0x04;
 			nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
@@ -1358,7 +1370,7 @@
 static void nes_rq_wqes_timeout(unsigned long parm)
 {
 	struct nes_vnic *nesvnic = (struct nes_vnic *)parm;
-	printk("%s: Timer fired.\n", __FUNCTION__);
+	printk("%s: Timer fired.\n", __func__);
 	atomic_set(&nesvnic->rx_skb_timer_running, 0);
 	if (atomic_read(&nesvnic->rx_skbs_needed))
 		nes_replenish_nic_rq(nesvnic);
@@ -1909,7 +1921,7 @@
 /**
  * nes_process_ceq
  */
-void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq)
+static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq)
 {
 	u64 u64temp;
 	struct nes_hw_cq *cq;
@@ -1949,7 +1961,7 @@
 /**
  * nes_process_aeq
  */
-void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq)
+static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq)
 {
 //	u64 u64temp;
 	u32 head;
@@ -2060,7 +2072,7 @@
 /**
  * nes_process_mac_intr
  */
-void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
+static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
 {
 	unsigned long flags;
 	u32 pcs_control_status;
@@ -2163,7 +2175,7 @@
 				temp_phy_data = phy_data;
 			} while (1);
 			nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
-				__FUNCTION__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP");
+				__func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP");
 
 		} else {
 			phy_data = (0x0f0f0000 == (pcs_control_status & 0x0f1f0000)) ? 4 : 0;
@@ -2205,7 +2217,7 @@
 
 
 
-void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
+static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
 {
 	struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq);
 
@@ -2428,7 +2440,7 @@
 /**
  * nes_cqp_ce_handler
  */
-void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
+static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
 {
 	u64 u64temp;
 	unsigned long flags;
@@ -2567,7 +2579,8 @@
 /**
  * nes_process_iwarp_aeqe
  */
-void nes_process_iwarp_aeqe(struct nes_device *nesdev, struct nes_hw_aeqe *aeqe)
+static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
+				   struct nes_hw_aeqe *aeqe)
 {
 	u64 context;
 	u64 aeqe_context = 0;
@@ -2819,7 +2832,7 @@
 					le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
 			if (resource_allocated) {
 				printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n",
-						__FUNCTION__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
+						__func__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
 			}
 			break;
 		case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index eee77da..34166641 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -802,7 +802,7 @@
 
 	memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len);
 	printk(PFX "%s: Address length = %d, Address = %02X%02X%02X%02X%02X%02X..\n",
-		   __FUNCTION__, netdev->addr_len,
+		   __func__, netdev->addr_len,
 		   mac_addr->sa_data[0], mac_addr->sa_data[1],
 		   mac_addr->sa_data[2], mac_addr->sa_data[3],
 		   mac_addr->sa_data[4], mac_addr->sa_data[5]);
@@ -832,7 +832,7 @@
 /**
  * nes_netdev_set_multicast_list
  */
-void nes_netdev_set_multicast_list(struct net_device *netdev)
+static void nes_netdev_set_multicast_list(struct net_device *netdev)
 {
 	struct nes_vnic *nesvnic = netdev_priv(netdev);
 	struct nes_device *nesdev = nesvnic->nesdev;
@@ -947,28 +947,6 @@
 	return ret;
 }
 
-
-/**
- * nes_netdev_exit - destroy network device
- */
-void nes_netdev_exit(struct nes_vnic *nesvnic)
-{
-	struct net_device *netdev = nesvnic->netdev;
-	struct nes_ib_device *nesibdev = nesvnic->nesibdev;
-
-	nes_debug(NES_DBG_SHUTDOWN, "\n");
-
-	// destroy the ibdevice if RDMA enabled
-	if ((nesvnic->rdma_enabled)&&(nesvnic->of_device_registered)) {
-		nes_destroy_ofa_device( nesibdev );
-		nesvnic->of_device_registered = 0;
-		nesvnic->nesibdev = NULL;
-	}
-	unregister_netdev(netdev);
-	nes_debug(NES_DBG_SHUTDOWN, "\n");
-}
-
-
 #define NES_ETHTOOL_STAT_COUNT 55
 static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN] = {
 	"Link Change Interrupts",
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index c4ec6ac..f9db07c 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -566,7 +566,7 @@
 				cqp_request);
 	} else
 		printk(KERN_ERR PFX "%s: Could not allocated a CQP request.\n",
-			   __FUNCTION__);
+			   __func__);
 
 	return cqp_request;
 }
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index a651e9d..7c27420 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -49,6 +49,7 @@
 atomic_t qps_created;
 atomic_t sw_qps_destroyed;
 
+static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
 
 /**
  * nes_alloc_mw
@@ -1043,10 +1044,10 @@
 	u8 sq_pbl_entries;
 
 	pbl_entries = nespbl->pbl_size >> 3;
-	nes_debug(NES_DBG_QP, "Userspace PBL, pbl_size=%u, pbl_entries = %d pbl_vbase=%p, pbl_pbase=%p\n",
+	nes_debug(NES_DBG_QP, "Userspace PBL, pbl_size=%u, pbl_entries = %d pbl_vbase=%p, pbl_pbase=%lx\n",
 			nespbl->pbl_size, pbl_entries,
 			(void *)nespbl->pbl_vbase,
-			(void *)nespbl->pbl_pbase);
+			(unsigned long) nespbl->pbl_pbase);
 	pbl = (__le64 *) nespbl->pbl_vbase; /* points to first pbl entry */
 	/* now lets set the sq_vbase as well as rq_vbase addrs we will assign */
 	/* the first pbl to be fro the rq_vbase... */
@@ -1074,9 +1075,9 @@
 	/* nesqp->hwqp.rq_vbase = bus_to_virt(*pbl); */
 	/*nesqp->hwqp.rq_vbase = phys_to_virt(*pbl); */
 
-	nes_debug(NES_DBG_QP, "QP sq_vbase= %p sq_pbase=%p rq_vbase=%p rq_pbase=%p\n",
-			nesqp->hwqp.sq_vbase, (void *)nesqp->hwqp.sq_pbase,
-			nesqp->hwqp.rq_vbase, (void *)nesqp->hwqp.rq_pbase);
+	nes_debug(NES_DBG_QP, "QP sq_vbase= %p sq_pbase=%lx rq_vbase=%p rq_pbase=%lx\n",
+		  nesqp->hwqp.sq_vbase, (unsigned long) nesqp->hwqp.sq_pbase,
+		  nesqp->hwqp.rq_vbase, (unsigned long) nesqp->hwqp.rq_pbase);
 	spin_lock_irqsave(&nesadapter->pbl_lock, flags);
 	if (!nesadapter->free_256pbl) {
 		pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
@@ -1251,6 +1252,9 @@
 	u8 rq_encoded_size;
 	/* int counter; */
 
+	if (init_attr->create_flags)
+		return ERR_PTR(-EINVAL);
+
 	atomic_inc(&qps_created);
 	switch (init_attr->qp_type) {
 		case IB_QPT_RC:
@@ -1908,13 +1912,13 @@
 		nesadapter->free_256pbl++;
 		if (nesadapter->free_256pbl > nesadapter->max_256pbl) {
 			printk(KERN_ERR PFX "%s: free 256B PBLs(%u) has exceeded the max(%u)\n",
-					__FUNCTION__, nesadapter->free_256pbl, nesadapter->max_256pbl);
+					__func__, nesadapter->free_256pbl, nesadapter->max_256pbl);
 		}
 	} else if (nescq->virtual_cq == 2) {
 		nesadapter->free_4kpbl++;
 		if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) {
 			printk(KERN_ERR PFX "%s: free 4K PBLs(%u) has exceeded the max(%u)\n",
-					__FUNCTION__, nesadapter->free_4kpbl, nesadapter->max_4kpbl);
+					__func__, nesadapter->free_4kpbl, nesadapter->max_4kpbl);
 		}
 		opcode |= NES_CQP_CQ_4KB_CHUNK;
 	}
@@ -2653,10 +2657,10 @@
 
 			nespbl->pbl_vbase = (u64 *)pbl;
 			nespbl->user_base = start;
-			nes_debug(NES_DBG_MR, "Allocated PBL memory, %u bytes, pbl_pbase=%p,"
+			nes_debug(NES_DBG_MR, "Allocated PBL memory, %u bytes, pbl_pbase=%lx,"
 					" pbl_vbase=%p user_base=0x%lx\n",
-					nespbl->pbl_size, (void *)nespbl->pbl_pbase,
-					(void*)nespbl->pbl_vbase, nespbl->user_base);
+				  nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase,
+				  (void *) nespbl->pbl_vbase, nespbl->user_base);
 
 			list_for_each_entry(chunk, &region->chunk_list, list) {
 				for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
@@ -3895,14 +3899,11 @@
 /**
  * nes_unregister_ofa_device
  */
-void nes_unregister_ofa_device(struct nes_ib_device *nesibdev)
+static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev)
 {
 	struct nes_vnic *nesvnic = nesibdev->nesvnic;
 	int i;
 
-	if (nesibdev == NULL)
-		return;
-
 	for (i = 0; i < ARRAY_SIZE(nes_class_attributes); ++i) {
 		class_device_remove_file(&nesibdev->ibdev.class_dev, nes_class_attributes[i]);
 	}
diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile
index 98ee38e..3090100 100644
--- a/drivers/infiniband/ulp/ipoib/Makefile
+++ b/drivers/infiniband/ulp/ipoib/Makefile
@@ -4,7 +4,8 @@
 						   ipoib_ib.o \
 						   ipoib_multicast.o \
 						   ipoib_verbs.o \
-						   ipoib_vlan.o
+						   ipoib_vlan.o \
+						   ipoib_ethtool.o
 ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM)		+= ipoib_cm.o
 ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG)	+= ipoib_fs.o
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 054fab8..73b2b17 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -87,6 +87,7 @@
 	IPOIB_MCAST_STARTED	  = 8,
 	IPOIB_FLAG_ADMIN_CM	  = 9,
 	IPOIB_FLAG_UMCAST	  = 10,
+	IPOIB_FLAG_CSUM		  = 11,
 
 	IPOIB_MAX_BACKOFF_SECONDS = 16,
 
@@ -241,6 +242,11 @@
 	int			num_frags;
 };
 
+struct ipoib_ethtool_st {
+	u16     coalesce_usecs;
+	u16     max_coalesced_frames;
+};
+
 /*
  * Device private locking: tx_lock protects members used in TX fast
  * path (and we use LLTX so upper layers don't do extra locking).
@@ -318,6 +324,8 @@
 	struct dentry *mcg_dentry;
 	struct dentry *path_dentry;
 #endif
+	int	hca_caps;
+	struct ipoib_ethtool_st ethtool;
 };
 
 struct ipoib_ah {
@@ -458,6 +466,8 @@
 int ipoib_pkey_dev_delay_open(struct net_device *dev);
 void ipoib_drain_cq(struct net_device *dev);
 
+void ipoib_set_ethtool_ops(struct net_device *dev);
+
 #ifdef CONFIG_INFINIBAND_IPOIB_CM
 
 #define IPOIB_FLAGS_RC		0x80
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 2490b2d..9db7b0b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1007,9 +1007,9 @@
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
-	ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
+	ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
 	if (ret) {
-		ipoib_warn(priv, "pkey 0x%x not in cache: %d\n", priv->pkey, ret);
+		ipoib_warn(priv, "pkey 0x%x not found: %d\n", priv->pkey, ret);
 		return ret;
 	}
 
@@ -1383,6 +1383,10 @@
 		set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
 		ipoib_warn(priv, "enabling connected mode "
 			   "will cause multicast packet drops\n");
+
+		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO);
+		priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
+
 		ipoib_flush_paths(dev);
 		return count;
 	}
@@ -1391,6 +1395,13 @@
 		clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
 		dev->mtu = min(priv->mcast_mtu, dev->mtu);
 		ipoib_flush_paths(dev);
+
+		if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) {
+			dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
+			if (priv->hca_caps & IB_DEVICE_UD_TSO)
+				dev->features |= NETIF_F_TSO;
+		}
+
 		return count;
 	}
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
new file mode 100644
index 0000000..9a47428
--- /dev/null
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+
+#include "ipoib.h"
+
+static void ipoib_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *drvinfo)
+{
+	strncpy(drvinfo->driver, "ipoib", sizeof(drvinfo->driver) - 1);
+}
+
+static int ipoib_get_coalesce(struct net_device *dev,
+			      struct ethtool_coalesce *coal)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs;
+	coal->tx_coalesce_usecs = priv->ethtool.coalesce_usecs;
+	coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
+	coal->tx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
+
+	return 0;
+}
+
+static int ipoib_set_coalesce(struct net_device *dev,
+			      struct ethtool_coalesce *coal)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int ret;
+
+	/*
+	 * Since IPoIB uses a single CQ for both rx and tx, we assume
+	 * that rx params dictate the configuration.  These values are
+	 * saved in the private data and returned when ipoib_get_coalesce()
+	 * is called.
+	 */
+	if (coal->rx_coalesce_usecs       > 0xffff ||
+	    coal->rx_max_coalesced_frames > 0xffff)
+		return -EINVAL;
+
+	ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames,
+			   coal->rx_coalesce_usecs);
+	if (ret && ret != -ENOSYS) {
+		ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);
+		return ret;
+	}
+
+	coal->tx_coalesce_usecs       = coal->rx_coalesce_usecs;
+	coal->tx_max_coalesced_frames = coal->rx_max_coalesced_frames;
+	priv->ethtool.coalesce_usecs       = coal->rx_coalesce_usecs;
+	priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames;
+
+	return 0;
+}
+
+static const struct ethtool_ops ipoib_ethtool_ops = {
+	.get_drvinfo		= ipoib_get_drvinfo,
+	.get_tso		= ethtool_op_get_tso,
+	.get_coalesce		= ipoib_get_coalesce,
+	.set_coalesce		= ipoib_set_coalesce,
+};
+
+void ipoib_set_ethtool_ops(struct net_device *dev)
+{
+	SET_ETHTOOL_OPS(dev, &ipoib_ethtool_ops);
+}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 08c4396..0205eb7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -39,6 +39,8 @@
 #include <linux/dma-mapping.h>
 
 #include <rdma/ib_cache.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
 
 #include "ipoib.h"
 
@@ -231,6 +233,10 @@
 	skb->dev = dev;
 	/* XXX get correct PACKET_ type here */
 	skb->pkt_type = PACKET_HOST;
+
+	if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
 	netif_receive_skb(skb);
 
 repost:
@@ -245,29 +251,37 @@
 	struct sk_buff *skb = tx_req->skb;
 	u64 *mapping = tx_req->mapping;
 	int i;
+	int off;
 
-	mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
-				       DMA_TO_DEVICE);
-	if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
-		return -EIO;
+	if (skb_headlen(skb)) {
+		mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
+					       DMA_TO_DEVICE);
+		if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
+			return -EIO;
+
+		off = 1;
+	} else
+		off = 0;
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		mapping[i + 1] = ib_dma_map_page(ca, frag->page,
+		mapping[i + off] = ib_dma_map_page(ca, frag->page,
 						 frag->page_offset, frag->size,
 						 DMA_TO_DEVICE);
-		if (unlikely(ib_dma_mapping_error(ca, mapping[i + 1])))
+		if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
 			goto partial_error;
 	}
 	return 0;
 
 partial_error:
-	ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
-
 	for (; i > 0; --i) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
-		ib_dma_unmap_page(ca, mapping[i], frag->size, DMA_TO_DEVICE);
+		ib_dma_unmap_page(ca, mapping[i - !off], frag->size, DMA_TO_DEVICE);
 	}
+
+	if (off)
+		ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
+
 	return -EIO;
 }
 
@@ -277,12 +291,17 @@
 	struct sk_buff *skb = tx_req->skb;
 	u64 *mapping = tx_req->mapping;
 	int i;
+	int off;
 
-	ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
+	if (skb_headlen(skb)) {
+		ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
+		off = 1;
+	} else
+		off = 0;
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		ib_dma_unmap_page(ca, mapping[i + 1], frag->size,
+		ib_dma_unmap_page(ca, mapping[i + off], frag->size,
 				  DMA_TO_DEVICE);
 	}
 }
@@ -388,24 +407,40 @@
 static inline int post_send(struct ipoib_dev_priv *priv,
 			    unsigned int wr_id,
 			    struct ib_ah *address, u32 qpn,
-			    u64 *mapping, int headlen,
-			    skb_frag_t *frags,
-			    int nr_frags)
+			    struct ipoib_tx_buf *tx_req,
+			    void *head, int hlen)
 {
 	struct ib_send_wr *bad_wr;
-	int i;
+	int i, off;
+	struct sk_buff *skb = tx_req->skb;
+	skb_frag_t *frags = skb_shinfo(skb)->frags;
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	u64 *mapping = tx_req->mapping;
 
-	priv->tx_sge[0].addr	     = mapping[0];
-	priv->tx_sge[0].length       = headlen;
+	if (skb_headlen(skb)) {
+		priv->tx_sge[0].addr         = mapping[0];
+		priv->tx_sge[0].length       = skb_headlen(skb);
+		off = 1;
+	} else
+		off = 0;
+
 	for (i = 0; i < nr_frags; ++i) {
-		priv->tx_sge[i + 1].addr = mapping[i + 1];
-		priv->tx_sge[i + 1].length = frags[i].size;
+		priv->tx_sge[i + off].addr = mapping[i + off];
+		priv->tx_sge[i + off].length = frags[i].size;
 	}
-	priv->tx_wr.num_sge	     = nr_frags + 1;
+	priv->tx_wr.num_sge	     = nr_frags + off;
 	priv->tx_wr.wr_id 	     = wr_id;
 	priv->tx_wr.wr.ud.remote_qpn = qpn;
 	priv->tx_wr.wr.ud.ah 	     = address;
 
+	if (head) {
+		priv->tx_wr.wr.ud.mss	 = skb_shinfo(skb)->gso_size;
+		priv->tx_wr.wr.ud.header = head;
+		priv->tx_wr.wr.ud.hlen	 = hlen;
+		priv->tx_wr.opcode	 = IB_WR_LSO;
+	} else
+		priv->tx_wr.opcode	 = IB_WR_SEND;
+
 	return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
 }
 
@@ -414,14 +449,30 @@
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_tx_buf *tx_req;
+	int hlen;
+	void *phead;
 
-	if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
-		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
-			   skb->len, priv->mcast_mtu + IPOIB_ENCAP_LEN);
-		++dev->stats.tx_dropped;
-		++dev->stats.tx_errors;
-		ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
-		return;
+	if (skb_is_gso(skb)) {
+		hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
+		phead = skb->data;
+		if (unlikely(!skb_pull(skb, hlen))) {
+			ipoib_warn(priv, "linear data too small\n");
+			++dev->stats.tx_dropped;
+			++dev->stats.tx_errors;
+			dev_kfree_skb_any(skb);
+			return;
+		}
+	} else {
+		if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
+			ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
+				   skb->len, priv->mcast_mtu + IPOIB_ENCAP_LEN);
+			++dev->stats.tx_dropped;
+			++dev->stats.tx_errors;
+			ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
+			return;
+		}
+		phead = NULL;
+		hlen  = 0;
 	}
 
 	ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
@@ -442,10 +493,13 @@
 		return;
 	}
 
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		priv->tx_wr.send_flags |= IB_SEND_IP_CSUM;
+	else
+		priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
+
 	if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
-			       address->ah, qpn,
-			       tx_req->mapping, skb_headlen(skb),
-			       skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags))) {
+			       address->ah, qpn, tx_req, phead, hlen))) {
 		ipoib_warn(priv, "post_send failed\n");
 		++dev->stats.tx_errors;
 		ipoib_dma_unmap_tx(priv->ca, tx_req);
@@ -540,7 +594,7 @@
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	u16 pkey_index = 0;
 
-	if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
+	if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
 		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 	else
 		set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
@@ -781,13 +835,13 @@
 			clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 			ipoib_ib_dev_down(dev, 0);
 			ipoib_ib_dev_stop(dev, 0);
-			ipoib_pkey_dev_delay_open(dev);
-			return;
+			if (ipoib_pkey_dev_delay_open(dev))
+				return;
 		}
-		set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 
 		/* restart QP only if P_Key index is changed */
-		if (new_index == priv->pkey_index) {
+		if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
+		    new_index == priv->pkey_index) {
 			ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
 			return;
 		}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 5728204..bd07f02 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -359,8 +359,7 @@
 	spin_lock_irq(&priv->tx_lock);
 	spin_lock(&priv->lock);
 
-	list_splice(&priv->path_list, &remove_list);
-	INIT_LIST_HEAD(&priv->path_list);
+	list_splice_init(&priv->path_list, &remove_list);
 
 	list_for_each_entry(path, &remove_list, list)
 		rb_erase(&path->rb_node, &priv->path_tree);
@@ -952,6 +951,8 @@
 	dev->set_multicast_list	 = ipoib_set_mcast_list;
 	dev->neigh_setup	 = ipoib_neigh_setup_dev;
 
+	ipoib_set_ethtool_ops(dev);
+
 	netif_napi_add(dev, &priv->napi, ipoib_poll, 100);
 
 	dev->watchdog_timeo	 = HZ;
@@ -1105,6 +1106,7 @@
 					 struct ib_device *hca, u8 port)
 {
 	struct ipoib_dev_priv *priv;
+	struct ib_device_attr *device_attr;
 	int result = -ENOMEM;
 
 	priv = ipoib_intf_alloc(format);
@@ -1120,6 +1122,29 @@
 		goto device_init_failed;
 	}
 
+	device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
+	if (!device_attr) {
+		printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
+		       hca->name, sizeof *device_attr);
+		goto device_init_failed;
+	}
+
+	result = ib_query_device(hca, device_attr);
+	if (result) {
+		printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
+		       hca->name, result);
+		kfree(device_attr);
+		goto device_init_failed;
+	}
+	priv->hca_caps = device_attr->device_cap_flags;
+
+	kfree(device_attr);
+
+	if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
+		set_bit(IPOIB_FLAG_CSUM, &priv->flags);
+		priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+	}
+
 	/*
 	 * Set the full membership bit, so that we join the right
 	 * broadcast group, etc.
@@ -1137,7 +1162,6 @@
 	} else
 		memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
 
-
 	result = ipoib_dev_init(priv->dev, hca, port);
 	if (result < 0) {
 		printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
@@ -1155,6 +1179,9 @@
 		goto event_failed;
 	}
 
+	if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO)
+		priv->dev->features |= NETIF_F_TSO;
+
 	result = register_netdev(priv->dev);
 	if (result) {
 		printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n",
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index a3aeb91..8a20e37 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -192,6 +192,9 @@
 	init_attr.send_cq = priv->cq;
 	init_attr.recv_cq = priv->cq;
 
+	if (priv->hca_caps & IB_DEVICE_UD_TSO)
+		init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO;
+
 	if (dev->features & NETIF_F_SG)
 		init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
 
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 83247f1..08dc81c 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -405,7 +405,7 @@
 	struct iser_dto *send_dto = NULL;
 	unsigned long buf_offset;
 	unsigned long data_seg_len;
-	unsigned int itt;
+	uint32_t itt;
 	int err = 0;
 
 	if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
@@ -416,7 +416,7 @@
 	if (iser_check_xmit(conn, ctask))
 		return -ENOBUFS;
 
-	itt = ntohl(hdr->itt);
+	itt = (__force uint32_t)hdr->itt;
 	data_seg_len = ntoh24(hdr->dlength);
 	buf_offset   = ntohl(hdr->offset);
 
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 993f0a8..d19cfe6 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -473,11 +473,8 @@
 		iser_connect_error(cma_id);
 		break;
 	case RDMA_CM_EVENT_DISCONNECTED:
-		iser_disconnected_handler(cma_id);
-		break;
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
-		iser_err("Device removal is currently unsupported\n");
-		BUG();
+		iser_disconnected_handler(cma_id);
 		break;
 	default:
 		iser_err("Unexpected RDMA CM event (%d)\n", event->event);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index fd4a49f..125765a 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -68,7 +68,7 @@
 
 module_param(srp_sg_tablesize, int, 0444);
 MODULE_PARM_DESC(srp_sg_tablesize,
-		 "Max number of gather/scatter entries per I/O (default is 12)");
+		 "Max number of gather/scatter entries per I/O (default is 12, max 255)");
 
 static int topspin_workarounds = 1;
 
@@ -2138,6 +2138,11 @@
 {
 	int ret;
 
+	if (srp_sg_tablesize > 255) {
+		printk(KERN_WARNING PFX "Clamping srp_sg_tablesize to 255\n");
+		srp_sg_tablesize = 255;
+	}
+
 	ib_srp_transport_template =
 		srp_attach_transport(&ib_srp_transport_functions);
 	if (!ib_srp_transport_template)
diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c
index 6b32ec9..aa95287 100644
--- a/drivers/net/mlx4/catas.c
+++ b/drivers/net/mlx4/catas.c
@@ -69,7 +69,7 @@
 	if (readl(priv->catas_err.map)) {
 		dump_err_buf(dev);
 
-		mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
+		mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
 
 		if (internal_err_reset) {
 			spin_lock(&catas_lock);
diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c
index db49051..70dff94 100644
--- a/drivers/net/mlx4/cmd.c
+++ b/drivers/net/mlx4/cmd.c
@@ -106,7 +106,8 @@
 	u16			token;
 };
 
-static int mlx4_status_to_errno(u8 status) {
+static int mlx4_status_to_errno(u8 status)
+{
 	static const int trans_table[] = {
 		[CMD_STAT_INTERNAL_ERR]	  = -EIO,
 		[CMD_STAT_BAD_OP]	  = -EPERM,
diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
index d4441fe..caa5bcf 100644
--- a/drivers/net/mlx4/cq.c
+++ b/drivers/net/mlx4/cq.c
@@ -38,6 +38,7 @@
 #include <linux/hardirq.h>
 
 #include <linux/mlx4/cmd.h>
+#include <linux/mlx4/cq.h>
 
 #include "mlx4.h"
 #include "icm.h"
@@ -47,21 +48,19 @@
 	u16			reserved1[3];
 	__be16			page_offset;
 	__be32			logsize_usrpage;
-	u8			reserved2;
-	u8			cq_period;
-	u8			reserved3;
-	u8			cq_max_count;
-	u8			reserved4[3];
+	__be16			cq_period;
+	__be16			cq_max_count;
+	u8			reserved2[3];
 	u8			comp_eqn;
 	u8			log_page_size;
-	u8			reserved5[2];
+	u8			reserved3[2];
 	u8			mtt_base_addr_h;
 	__be32			mtt_base_addr_l;
 	__be32			last_notified_index;
 	__be32			solicit_producer_index;
 	__be32			consumer_index;
 	__be32			producer_index;
-	u32			reserved6[2];
+	u32			reserved4[2];
 	__be64			db_rec_addr;
 };
 
@@ -121,6 +120,13 @@
 			MLX4_CMD_TIME_CLASS_A);
 }
 
+static int mlx4_MODIFY_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+			 int cq_num, u32 opmod)
+{
+	return mlx4_cmd(dev, mailbox->dma, cq_num, opmod, MLX4_CMD_MODIFY_CQ,
+			MLX4_CMD_TIME_CLASS_A);
+}
+
 static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			 int cq_num)
 {
@@ -129,6 +135,58 @@
 			    MLX4_CMD_TIME_CLASS_A);
 }
 
+int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq,
+		   u16 count, u16 period)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_cq_context *cq_context;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	cq_context = mailbox->buf;
+	memset(cq_context, 0, sizeof *cq_context);
+
+	cq_context->cq_max_count = cpu_to_be16(count);
+	cq_context->cq_period    = cpu_to_be16(period);
+
+	err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 1);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_cq_modify);
+
+int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq,
+		   int entries, struct mlx4_mtt *mtt)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_cq_context *cq_context;
+	u64 mtt_addr;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	cq_context = mailbox->buf;
+	memset(cq_context, 0, sizeof *cq_context);
+
+	cq_context->logsize_usrpage = cpu_to_be32(ilog2(entries) << 24);
+	cq_context->log_page_size   = mtt->page_shift - 12;
+	mtt_addr = mlx4_mtt_addr(dev, mtt);
+	cq_context->mtt_base_addr_h = mtt_addr >> 32;
+	cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
+
+	err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 1);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_cq_resize);
+
 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq)
 {
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index 9c36c20..e141a15 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -202,7 +202,10 @@
 			break;
 
 		case MLX4_EVENT_TYPE_PORT_CHANGE:
-			mlx4_dispatch_event(dev, eqe->type, eqe->subtype,
+			mlx4_dispatch_event(dev,
+					    eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_ACTIVE ?
+					    MLX4_DEV_EVENT_PORT_UP :
+					    MLX4_DEV_EVENT_PORT_DOWN,
 					    be32_to_cpu(eqe->event.port_change.port) >> 28);
 			break;
 
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 61dc495..d82f275 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -133,6 +133,7 @@
 #define QUERY_DEV_CAP_MAX_AV_OFFSET		0x27
 #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET		0x29
 #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET		0x2b
+#define QUERY_DEV_CAP_MAX_GSO_OFFSET		0x2d
 #define QUERY_DEV_CAP_MAX_RDMA_OFFSET		0x2f
 #define QUERY_DEV_CAP_RSZ_SRQ_OFFSET		0x33
 #define QUERY_DEV_CAP_ACK_DELAY_OFFSET		0x35
@@ -215,6 +216,13 @@
 	dev_cap->max_requester_per_qp = 1 << (field & 0x3f);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET);
 	dev_cap->max_responder_per_qp = 1 << (field & 0x3f);
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GSO_OFFSET);
+	field &= 0x1f;
+	if (!field)
+		dev_cap->max_gso_sz = 0;
+	else
+		dev_cap->max_gso_sz = 1 << field;
+
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET);
 	dev_cap->max_rdma_global = 1 << (field & 0x3f);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET);
@@ -377,6 +385,7 @@
 		 dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg);
 	mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n",
 		 dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg);
+	mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz);
 
 	dump_dev_cap_flags(dev, dev_cap->flags);
 
@@ -696,6 +705,10 @@
 	/* Check port for UD address vector: */
 	*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
 
+	/* Enable IPoIB checksumming if we can: */
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
+		*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3);
+
 	/* QPC/EEC/CQC/EQC/RDMARC attributes */
 
 	MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index e16dec8..306cb9b 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -96,6 +96,7 @@
 	u8  bmme_flags;
 	u32 reserved_lkey;
 	u64 max_icm_sz;
+	int max_gso_sz;
 };
 
 struct mlx4_adapter {
diff --git a/drivers/net/mlx4/intf.c b/drivers/net/mlx4/intf.c
index be5d9e9..4a6c4d5 100644
--- a/drivers/net/mlx4/intf.c
+++ b/drivers/net/mlx4/intf.c
@@ -30,8 +30,6 @@
  * SOFTWARE.
  */
 
-#include <linux/mlx4/driver.h>
-
 #include "mlx4.h"
 
 struct mlx4_device_context {
@@ -113,8 +111,7 @@
 }
 EXPORT_SYMBOL_GPL(mlx4_unregister_interface);
 
-void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type,
-			 int subtype, int port)
+void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int port)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_device_context *dev_ctx;
@@ -124,8 +121,7 @@
 
 	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
 		if (dev_ctx->intf->event)
-			dev_ctx->intf->event(dev, dev_ctx->context, type,
-					     subtype, port);
+			dev_ctx->intf->event(dev, dev_ctx->context, type, port);
 
 	spin_unlock_irqrestore(&priv->ctx_lock, flags);
 }
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 08bfc13..49a4aca 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -76,7 +76,7 @@
 	DRV_VERSION " (" DRV_RELDATE ")\n";
 
 static struct mlx4_profile default_profile = {
-	.num_qp		= 1 << 16,
+	.num_qp		= 1 << 17,
 	.num_srq	= 1 << 16,
 	.rdmarc_per_qp	= 1 << 4,
 	.num_cq		= 1 << 16,
@@ -159,6 +159,7 @@
 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
 	dev->caps.flags		     = dev_cap->flags;
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
+	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 
 	return 0;
 }
@@ -735,8 +736,7 @@
 	}
 
 	/*
-	 * Check for BARs.  We expect 0: 1MB, 2: 8MB, 4: DDR (may not
-	 * be present)
+	 * Check for BARs.  We expect 0: 1MB
 	 */
 	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
 	    pci_resource_len(pdev, 0) != 1 << 20) {
diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c
index a99e772..57f7f1f 100644
--- a/drivers/net/mlx4/mcg.c
+++ b/drivers/net/mlx4/mcg.c
@@ -190,10 +190,6 @@
 		}
 		index += dev->caps.num_mgms;
 
-		err = mlx4_READ_MCG(dev, index, mailbox);
-		if (err)
-			goto out;
-
 		memset(mgm, 0, sizeof *mgm);
 		memcpy(mgm->gid, gid, 16);
 	}
@@ -301,12 +297,10 @@
 	mgm->qp[loc]       = mgm->qp[i - 1];
 	mgm->qp[i - 1]     = 0;
 
-	err = mlx4_WRITE_MCG(dev, index, mailbox);
-	if (err)
+	if (i != 1) {
+		err = mlx4_WRITE_MCG(dev, index, mailbox);
 		goto out;
-
-	if (i != 1)
-		goto out;
+	}
 
 	if (prev == -1) {
 		/* Remove entry from MGM */
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 53a1cdd..7333681 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -42,6 +42,7 @@
 #include <linux/timer.h>
 
 #include <linux/mlx4/device.h>
+#include <linux/mlx4/driver.h>
 #include <linux/mlx4/doorbell.h>
 
 #define DRV_NAME	"mlx4_core"
@@ -313,8 +314,7 @@
 int mlx4_restart_one(struct pci_dev *pdev);
 int mlx4_register_device(struct mlx4_dev *dev);
 void mlx4_unregister_device(struct mlx4_dev *dev);
-void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type,
-			 int subtype, int port);
+void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int port);
 
 struct mlx4_dev_cap;
 struct mlx4_init_hca_param;
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index e879b21..0788319 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -20,6 +20,7 @@
 config DASD
 	tristate "Support for DASD devices"
 	depends on CCW && BLOCK
+	select IOSCHED_DEADLINE
 	help
 	  Enable this option if you want to access DASDs directly utilizing
 	  S/390s channel subsystem commands. This is necessary for running
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index ccf46c9..ac6d4d3 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -980,12 +980,12 @@
 			break;
 		case -ETIMEDOUT:
 			printk(KERN_WARNING"%s(%s): request timed out\n",
-			       __FUNCTION__, cdev->dev.bus_id);
+			       __func__, cdev->dev.bus_id);
 			//FIXME - dasd uses own timeout interface...
 			break;
 		default:
 			printk(KERN_WARNING"%s(%s): unknown error %ld\n",
-			       __FUNCTION__, cdev->dev.bus_id, PTR_ERR(irb));
+			       __func__, cdev->dev.bus_id, PTR_ERR(irb));
 		}
 		return;
 	}
@@ -1956,6 +1956,7 @@
 	block->request_queue->queuedata = block;
 
 	elevator_exit(block->request_queue->elevator);
+	block->request_queue->elevator = NULL;
 	rc = elevator_init(block->request_queue, "deadline");
 	if (rc) {
 		blk_cleanup_queue(block->request_queue);
@@ -2298,9 +2299,8 @@
 	 * in the other openers.
 	 */
 	if (device->block) {
-		struct dasd_block *block = device->block;
-		max_count = block->bdev ? 0 : -1;
-		open_count = (int) atomic_read(&block->open_count);
+		max_count = device->block->bdev ? 0 : -1;
+		open_count = atomic_read(&device->block->open_count);
 		if (open_count > max_count) {
 			if (open_count > 0)
 				printk(KERN_WARNING "Can't offline dasd "
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index b19db20..e6700df 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -1996,6 +1996,36 @@
 }				/* end dasd_3990_erp_compound */
 
 /*
+ *DASD_3990_ERP_HANDLE_SIM
+ *
+ *DESCRIPTION
+ *  inspects the SIM SENSE data and starts an appropriate action
+ *
+ * PARAMETER
+ *   sense	   sense data of the actual error
+ *
+ * RETURN VALUES
+ *   none
+ */
+void
+dasd_3990_erp_handle_sim(struct dasd_device *device, char *sense)
+{
+	/* print message according to log or message to operator mode */
+	if ((sense[24] & DASD_SIM_MSG_TO_OP) || (sense[1] & 0x10)) {
+
+		/* print SIM SRC from RefCode */
+		DEV_MESSAGE(KERN_ERR, device, "SIM - SRC: "
+			    "%02x%02x%02x%02x", sense[22],
+			    sense[23], sense[11], sense[12]);
+	} else if (sense[24] & DASD_SIM_LOG) {
+		/* print SIM SRC Refcode */
+		DEV_MESSAGE(KERN_WARNING, device, "SIM - SRC: "
+			    "%02x%02x%02x%02x", sense[22],
+			    sense[23], sense[11], sense[12]);
+	}
+}
+
+/*
  * DASD_3990_ERP_INSPECT_32
  *
  * DESCRIPTION
@@ -2018,6 +2048,10 @@
 
 	erp->function = dasd_3990_erp_inspect_32;
 
+	/* check for SIM sense data */
+	if ((sense[6] & DASD_SIM_SENSE) == DASD_SIM_SENSE)
+		dasd_3990_erp_handle_sim(device, sense);
+
 	if (sense[25] & DASD_SENSE_BIT_0) {
 
 		/* compound program action codes (byte25 bit 0 == '1') */
diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index 3a40bee..2d8df0b 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c
@@ -745,6 +745,19 @@
 	spin_unlock_irqrestore(&lcu->lock, flags);
 }
 
+static void __stop_device_on_lcu(struct dasd_device *device,
+				 struct dasd_device *pos)
+{
+	/* If pos == device then device is already locked! */
+	if (pos == device) {
+		pos->stopped |= DASD_STOPPED_SU;
+		return;
+	}
+	spin_lock(get_ccwdev_lock(pos->cdev));
+	pos->stopped |= DASD_STOPPED_SU;
+	spin_unlock(get_ccwdev_lock(pos->cdev));
+}
+
 /*
  * This function is called in interrupt context, so the
  * cdev lock for device is already locked!
@@ -755,35 +768,15 @@
 	struct alias_pav_group *pavgroup;
 	struct dasd_device *pos;
 
-	list_for_each_entry(pos, &lcu->active_devices, alias_list) {
-		if (pos != device)
-			spin_lock(get_ccwdev_lock(pos->cdev));
-		pos->stopped |= DASD_STOPPED_SU;
-		if (pos != device)
-			spin_unlock(get_ccwdev_lock(pos->cdev));
-	}
-	list_for_each_entry(pos, &lcu->inactive_devices, alias_list) {
-		if (pos != device)
-			spin_lock(get_ccwdev_lock(pos->cdev));
-		pos->stopped |= DASD_STOPPED_SU;
-		if (pos != device)
-			spin_unlock(get_ccwdev_lock(pos->cdev));
-	}
+	list_for_each_entry(pos, &lcu->active_devices, alias_list)
+		__stop_device_on_lcu(device, pos);
+	list_for_each_entry(pos, &lcu->inactive_devices, alias_list)
+		__stop_device_on_lcu(device, pos);
 	list_for_each_entry(pavgroup, &lcu->grouplist, group) {
-		list_for_each_entry(pos, &pavgroup->baselist, alias_list) {
-			if (pos != device)
-				spin_lock(get_ccwdev_lock(pos->cdev));
-			pos->stopped |= DASD_STOPPED_SU;
-			if (pos != device)
-				spin_unlock(get_ccwdev_lock(pos->cdev));
-		}
-		list_for_each_entry(pos, &pavgroup->aliaslist, alias_list) {
-			if (pos != device)
-				spin_lock(get_ccwdev_lock(pos->cdev));
-			pos->stopped |= DASD_STOPPED_SU;
-			if (pos != device)
-				spin_unlock(get_ccwdev_lock(pos->cdev));
-		}
+		list_for_each_entry(pos, &pavgroup->baselist, alias_list)
+			__stop_device_on_lcu(device, pos);
+		list_for_each_entry(pos, &pavgroup->aliaslist, alias_list)
+			__stop_device_on_lcu(device, pos);
 	}
 }
 
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 61f1693..a0edae0 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1415,6 +1415,13 @@
 		return;
 	}
 
+
+	/* service information message SIM */
+	if ((irb->ecw[6] & DASD_SIM_SENSE) == DASD_SIM_SENSE) {
+		dasd_3990_erp_handle_sim(device, irb->ecw);
+		return;
+	}
+
 	/* just report other unsolicited interrupts */
 	DEV_MESSAGE(KERN_DEBUG, device, "%s",
 		    "unsolicited interrupt received");
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index d13ea05..1166115 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -125,7 +125,8 @@
 
 	private = (struct dasd_fba_private *) device->private;
 	if (private == NULL) {
-		private = kzalloc(sizeof(struct dasd_fba_private), GFP_KERNEL);
+		private = kzalloc(sizeof(struct dasd_fba_private),
+				  GFP_KERNEL | GFP_DMA);
 		if (private == NULL) {
 			DEV_MESSAGE(KERN_WARNING, device, "%s",
 				    "memory allocation failed for private "
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 44b2984..6c624bf 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -72,6 +72,11 @@
 #define DASD_SENSE_BIT_2 0x20
 #define DASD_SENSE_BIT_3 0x10
 
+/* BIT DEFINITIONS FOR SIM SENSE */
+#define DASD_SIM_SENSE 0x0F
+#define DASD_SIM_MSG_TO_OP 0x03
+#define DASD_SIM_LOG 0x0C
+
 /*
  * SECTION: MACROs for klogd and s390 debug feature (dbf)
  */
@@ -621,6 +626,7 @@
 
 /* externals in dasd_3990_erp.c */
 struct dasd_ccw_req *dasd_3990_erp_action(struct dasd_ccw_req *);
+void dasd_3990_erp_handle_sim(struct dasd_device *, char *);
 
 /* externals in dasd_eer.c */
 #ifdef CONFIG_DASD_EER
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index e6c94db..04787ea 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -142,57 +142,6 @@
 	return NULL;
 }
 
-/*
- * print appropriate error message for segment_load()/segment_type()
- * return code
- */
-static void
-dcssblk_segment_warn(int rc, char* seg_name)
-{
-	switch (rc) {
-	case -ENOENT:
-		PRINT_WARN("cannot load/query segment %s, does not exist\n",
-			   seg_name);
-		break;
-	case -ENOSYS:
-		PRINT_WARN("cannot load/query segment %s, not running on VM\n",
-			   seg_name);
-		break;
-	case -EIO:
-		PRINT_WARN("cannot load/query segment %s, hardware error\n",
-			   seg_name);
-		break;
-	case -ENOTSUPP:
-		PRINT_WARN("cannot load/query segment %s, is a multi-part "
-			   "segment\n", seg_name);
-		break;
-	case -ENOSPC:
-		PRINT_WARN("cannot load/query segment %s, overlaps with "
-			   "storage\n", seg_name);
-		break;
-	case -EBUSY:
-		PRINT_WARN("cannot load/query segment %s, overlaps with "
-			   "already loaded dcss\n", seg_name);
-		break;
-	case -EPERM:
-		PRINT_WARN("cannot load/query segment %s, already loaded in "
-			   "incompatible mode\n", seg_name);
-		break;
-	case -ENOMEM:
-		PRINT_WARN("cannot load/query segment %s, out of memory\n",
-			   seg_name);
-		break;
-	case -ERANGE:
-		PRINT_WARN("cannot load/query segment %s, exceeds kernel "
-			   "mapping range\n", seg_name);
-		break;
-	default:
-		PRINT_WARN("cannot load/query segment %s, return value %i\n",
-			   seg_name, rc);
-		break;
-	}
-}
-
 static void dcssblk_unregister_callback(struct device *dev)
 {
 	device_unregister(dev);
@@ -423,7 +372,7 @@
 	rc = segment_load(local_buf, SEGMENT_SHARED,
 				&dev_info->start, &dev_info->end);
 	if (rc < 0) {
-		dcssblk_segment_warn(rc, dev_info->segment_name);
+		segment_warning(rc, dev_info->segment_name);
 		goto dealloc_gendisk;
 	}
 	seg_byte_size = (dev_info->end - dev_info->start + 1);
diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c
index 67009bf..1e1f506 100644
--- a/drivers/s390/char/monreader.c
+++ b/drivers/s390/char/monreader.c
@@ -111,56 +111,6 @@
 	ASCEBC(ebcdic_name, 8);
 }
 
-/*
- * print appropriate error message for segment_load()/segment_type()
- * return code
- */
-static void mon_segment_warn(int rc, char* seg_name)
-{
-	switch (rc) {
-	case -ENOENT:
-		P_WARNING("cannot load/query segment %s, does not exist\n",
-			  seg_name);
-		break;
-	case -ENOSYS:
-		P_WARNING("cannot load/query segment %s, not running on VM\n",
-			  seg_name);
-		break;
-	case -EIO:
-		P_WARNING("cannot load/query segment %s, hardware error\n",
-			  seg_name);
-		break;
-	case -ENOTSUPP:
-		P_WARNING("cannot load/query segment %s, is a multi-part "
-			  "segment\n", seg_name);
-		break;
-	case -ENOSPC:
-		P_WARNING("cannot load/query segment %s, overlaps with "
-			  "storage\n", seg_name);
-		break;
-	case -EBUSY:
-		P_WARNING("cannot load/query segment %s, overlaps with "
-			  "already loaded dcss\n", seg_name);
-		break;
-	case -EPERM:
-		P_WARNING("cannot load/query segment %s, already loaded in "
-			  "incompatible mode\n", seg_name);
-		break;
-	case -ENOMEM:
-		P_WARNING("cannot load/query segment %s, out of memory\n",
-			  seg_name);
-		break;
-	case -ERANGE:
-		P_WARNING("cannot load/query segment %s, exceeds kernel "
-			  "mapping range\n", seg_name);
-		break;
-	default:
-		P_WARNING("cannot load/query segment %s, return value %i\n",
-			  seg_name, rc);
-		break;
-	}
-}
-
 static inline unsigned long mon_mca_start(struct mon_msg *monmsg)
 {
 	return *(u32 *) &monmsg->msg.rmmsg;
@@ -585,7 +535,7 @@
 
 	rc = segment_type(mon_dcss_name);
 	if (rc < 0) {
-		mon_segment_warn(rc, mon_dcss_name);
+		segment_warning(rc, mon_dcss_name);
 		goto out_iucv;
 	}
 	if (rc != SEG_TYPE_SC) {
@@ -598,7 +548,7 @@
 	rc = segment_load(mon_dcss_name, SEGMENT_SHARED,
 			  &mon_dcss_start, &mon_dcss_end);
 	if (rc < 0) {
-		mon_segment_warn(rc, mon_dcss_name);
+		segment_warning(rc, mon_dcss_name);
 		rc = -EINVAL;
 		goto out_iucv;
 	}
diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c
index 2e616e3..e3b3d39 100644
--- a/drivers/s390/char/sclp_tty.c
+++ b/drivers/s390/char/sclp_tty.c
@@ -332,7 +332,7 @@
 		if (sclp_ttybuf == NULL) {
 			while (list_empty(&sclp_tty_pages)) {
 				spin_unlock_irqrestore(&sclp_tty_lock, flags);
-				if (in_atomic())
+				if (in_interrupt())
 					sclp_sync_wait();
 				else
 					wait_event(sclp_tty_waitq,
diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c
index f7b258d..ed50759 100644
--- a/drivers/s390/char/sclp_vt220.c
+++ b/drivers/s390/char/sclp_vt220.c
@@ -383,7 +383,7 @@
  */
 static int
 __sclp_vt220_write(const unsigned char *buf, int count, int do_schedule,
-		   int convertlf)
+		   int convertlf, int may_schedule)
 {
 	unsigned long flags;
 	void *page;
@@ -398,9 +398,8 @@
 		/* Create a sclp output buffer if none exists yet */
 		if (sclp_vt220_current_request == NULL) {
 			while (list_empty(&sclp_vt220_empty)) {
-				spin_unlock_irqrestore(&sclp_vt220_lock,
-						       flags);
-				if (in_atomic())
+				spin_unlock_irqrestore(&sclp_vt220_lock, flags);
+				if (in_interrupt() || !may_schedule)
 					sclp_sync_wait();
 				else
 					wait_event(sclp_vt220_waitq,
@@ -450,7 +449,7 @@
 static int
 sclp_vt220_write(struct tty_struct *tty, const unsigned char *buf, int count)
 {
-	return __sclp_vt220_write(buf, count, 1, 0);
+	return __sclp_vt220_write(buf, count, 1, 0, 1);
 }
 
 #define SCLP_VT220_SESSION_ENDED	0x01
@@ -529,7 +528,7 @@
 static void
 sclp_vt220_put_char(struct tty_struct *tty, unsigned char ch)
 {
-	__sclp_vt220_write(&ch, 1, 0, 0);
+	__sclp_vt220_write(&ch, 1, 0, 0, 1);
 }
 
 /*
@@ -746,7 +745,7 @@
 static void
 sclp_vt220_con_write(struct console *con, const char *buf, unsigned int count)
 {
-	__sclp_vt220_write((const unsigned char *) buf, count, 1, 1);
+	__sclp_vt220_write((const unsigned char *) buf, count, 1, 1, 0);
 }
 
 static struct tty_driver *
diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c
index 5b47e9c..874adf3 100644
--- a/drivers/s390/char/tape_34xx.c
+++ b/drivers/s390/char/tape_34xx.c
@@ -394,7 +394,7 @@
 			return tape_34xx_erp_failed(request, -ENOSPC);
 		default:
 			PRINT_ERR("Invalid op in %s:%i\n",
-				  __FUNCTION__, __LINE__);
+				  __func__, __LINE__);
 			return tape_34xx_erp_failed(request, 0);
 		}
 	}
diff --git a/drivers/s390/char/tape_char.c b/drivers/s390/char/tape_char.c
index b830a8c..ebe8406 100644
--- a/drivers/s390/char/tape_char.c
+++ b/drivers/s390/char/tape_char.c
@@ -83,9 +83,9 @@
 void
 tapechar_cleanup_device(struct tape_device *device)
 {
-	unregister_tape_dev(device->rt);
+	unregister_tape_dev(&device->cdev->dev, device->rt);
 	device->rt = NULL;
-	unregister_tape_dev(device->nt);
+	unregister_tape_dev(&device->cdev->dev, device->nt);
 	device->nt = NULL;
 }
 
diff --git a/drivers/s390/char/tape_class.c b/drivers/s390/char/tape_class.c
index aa7f166..6dfdb7c 100644
--- a/drivers/s390/char/tape_class.c
+++ b/drivers/s390/char/tape_class.c
@@ -99,11 +99,10 @@
 }
 EXPORT_SYMBOL(register_tape_dev);
 
-void unregister_tape_dev(struct tape_class_device *tcd)
+void unregister_tape_dev(struct device *device, struct tape_class_device *tcd)
 {
 	if (tcd != NULL && !IS_ERR(tcd)) {
-		sysfs_remove_link(&tcd->class_device->kobj,
-				  tcd->mode_name);
+		sysfs_remove_link(&device->kobj, tcd->mode_name);
 		device_destroy(tape_class, tcd->char_device->dev);
 		cdev_del(tcd->char_device);
 		kfree(tcd);
diff --git a/drivers/s390/char/tape_class.h b/drivers/s390/char/tape_class.h
index e2b5ac9..707b7f4 100644
--- a/drivers/s390/char/tape_class.h
+++ b/drivers/s390/char/tape_class.h
@@ -56,6 +56,6 @@
 	char *			device_name,
 	char *			node_name
 );
-void unregister_tape_dev(struct tape_class_device *tcd);
+void unregister_tape_dev(struct device *device, struct tape_class_device *tcd);
 
 #endif /* __TAPE_CLASS_H__ */
diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index 7689b50..83ae9a8 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c
@@ -100,7 +100,8 @@
 	urd->reclen = cdev->id.driver_info;
 	ccw_device_get_id(cdev, &urd->dev_id);
 	mutex_init(&urd->io_mutex);
-	mutex_init(&urd->open_mutex);
+	init_waitqueue_head(&urd->wait);
+	spin_lock_init(&urd->open_lock);
 	atomic_set(&urd->ref_count,  1);
 	urd->cdev = cdev;
 	get_device(&cdev->dev);
@@ -678,17 +679,21 @@
 	if (!urd)
 		return -ENXIO;
 
-	if (file->f_flags & O_NONBLOCK) {
-		if (!mutex_trylock(&urd->open_mutex)) {
+	spin_lock(&urd->open_lock);
+	while (urd->open_flag) {
+		spin_unlock(&urd->open_lock);
+		if (file->f_flags & O_NONBLOCK) {
 			rc = -EBUSY;
 			goto fail_put;
 		}
-	} else {
-		if (mutex_lock_interruptible(&urd->open_mutex)) {
+		if (wait_event_interruptible(urd->wait, urd->open_flag == 0)) {
 			rc = -ERESTARTSYS;
 			goto fail_put;
 		}
+		spin_lock(&urd->open_lock);
 	}
+	urd->open_flag++;
+	spin_unlock(&urd->open_lock);
 
 	TRACE("ur_open\n");
 
@@ -720,7 +725,9 @@
 fail_urfile_free:
 	urfile_free(urf);
 fail_unlock:
-	mutex_unlock(&urd->open_mutex);
+	spin_lock(&urd->open_lock);
+	urd->open_flag--;
+	spin_unlock(&urd->open_lock);
 fail_put:
 	urdev_put(urd);
 	return rc;
@@ -731,7 +738,10 @@
 	struct urfile *urf = file->private_data;
 
 	TRACE("ur_release\n");
-	mutex_unlock(&urf->urd->open_mutex);
+	spin_lock(&urf->urd->open_lock);
+	urf->urd->open_flag--;
+	spin_unlock(&urf->urd->open_lock);
+	wake_up_interruptible(&urf->urd->wait);
 	urdev_put(urf->urd);
 	urfile_free(urf);
 	return 0;
diff --git a/drivers/s390/char/vmur.h b/drivers/s390/char/vmur.h
index fa95964..fa320ad 100644
--- a/drivers/s390/char/vmur.h
+++ b/drivers/s390/char/vmur.h
@@ -62,7 +62,6 @@
 struct urdev {
 	struct ccw_device *cdev;	/* Backpointer to ccw device */
 	struct mutex io_mutex;		/* Serialises device IO */
-	struct mutex open_mutex;	/* Serialises access to device */
 	struct completion *io_done;	/* do_ur_io waits; irq completes */
 	struct device *device;
 	struct cdev *char_device;
@@ -71,6 +70,9 @@
 	int class;			/* VM device class */
 	int io_request_rc;		/* return code from I/O request */
 	atomic_t ref_count;		/* reference counter */
+	wait_queue_head_t wait;		/* wait queue to serialize open */
+	int open_flag;			/* "urdev is open" flag */
+	spinlock_t open_lock;		/* serialize critical sections */
 };
 
 /*
diff --git a/drivers/s390/char/vmwatchdog.c b/drivers/s390/char/vmwatchdog.c
index 6f40fac..19f8389 100644
--- a/drivers/s390/char/vmwatchdog.c
+++ b/drivers/s390/char/vmwatchdog.c
@@ -96,7 +96,7 @@
 
 	if (ret) {
 		printk(KERN_WARNING "%s: problem setting interval %d, "
-			"cmd %s\n", __FUNCTION__, vmwdt_interval,
+			"cmd %s\n", __func__, vmwdt_interval,
 			vmwdt_cmd);
 	}
 	return ret;
@@ -107,7 +107,7 @@
 	int ret = __diag288(wdt_cancel, 0, "", 0);
 	if (ret) {
 		printk(KERN_WARNING "%s: problem disabling watchdog\n",
-			__FUNCTION__);
+			__func__);
 	}
 	return ret;
 }
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index f523501..bbbd14e 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -224,7 +224,7 @@
 
 	sa = kmalloc(sizeof(*sa), GFP_KERNEL);
 	if (!sa) {
-		ERROR_MSG("kmalloc failed: %s: %i\n",__FUNCTION__, __LINE__);
+		ERROR_MSG("kmalloc failed: %s: %i\n",__func__, __LINE__);
 		return -ENOMEM;
 	}
 	if (memcpy_hsa_kernel(sa, sys_info.sa_base, sys_info.sa_size) < 0) {
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 007aaeb..5de8690 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -217,6 +217,8 @@
 
 	if (chp_get_status(chpid) <= 0)
 		return;
+	/* Wait until previous actions have settled. */
+	css_wait_for_slow_path();
 	for_each_subchannel_staged(s390_subchannel_remove_chpid, NULL, &chpid);
 }
 
@@ -303,7 +305,8 @@
 		sprintf(dbf_txt, "fla%x", res_data->fla);
 		CIO_TRACE_EVENT( 2, dbf_txt);
 	}
-
+	/* Wait until previous actions have settled. */
+	css_wait_for_slow_path();
 	/*
 	 * I/O resources may have become accessible.
 	 * Scan through all subchannels that may be concerned and
@@ -561,9 +564,12 @@
 	sprintf(dbf_txt, "cadd%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
 
-	if (chp_get_status(chpid) != 0)
+	if (chp_get_status(chpid) != 0) {
+		/* Wait until previous actions have settled. */
+		css_wait_for_slow_path();
 		for_each_subchannel_staged(__chp_add, __chp_add_new_sch,
 					   &chpid);
+	}
 }
 
 static void __s390_subchannel_vary_chpid(struct subchannel *sch,
@@ -650,6 +656,8 @@
  */
 int chsc_chp_vary(struct chp_id chpid, int on)
 {
+	/* Wait until previous actions have settled. */
+	css_wait_for_slow_path();
 	/*
 	 * Redo PathVerification on the devices the chpid connects to
 	 */
@@ -758,7 +766,6 @@
 	if (!secm_area)
 		return -ENOMEM;
 
-	mutex_lock(&css->mutex);
 	if (enable && !css->cm_enabled) {
 		css->cub_addr1 = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
 		css->cub_addr2 = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
@@ -766,7 +773,6 @@
 			free_page((unsigned long)css->cub_addr1);
 			free_page((unsigned long)css->cub_addr2);
 			free_page((unsigned long)secm_area);
-			mutex_unlock(&css->mutex);
 			return -ENOMEM;
 		}
 	}
@@ -787,7 +793,6 @@
 		free_page((unsigned long)css->cub_addr1);
 		free_page((unsigned long)css->cub_addr2);
 	}
-	mutex_unlock(&css->mutex);
 	free_page((unsigned long)secm_area);
 	return ret;
 }
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 60590a1..23ffcc4 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -24,6 +24,7 @@
 #include <asm/ipl.h>
 #include <asm/chpid.h>
 #include <asm/airq.h>
+#include <asm/cpu.h>
 #include "cio.h"
 #include "css.h"
 #include "chsc.h"
@@ -649,13 +650,10 @@
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
-	asm volatile ("mc 0,0");
-	if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
-		/**
-		 * Make sure that the i/o interrupt did not "overtake"
-		 * the last HZ timer interrupt.
-		 */
-		account_ticks(S390_lowcore.int_clock);
+	s390_idle_check();
+	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+		/* Serve timer interrupts first. */
+		clock_comparator_work();
 	/*
 	 * Get interrupt information from lowcore
 	 */
@@ -672,10 +670,14 @@
 			continue;
 		}
 		sch = (struct subchannel *)(unsigned long)tpi_info->intparm;
-		if (sch)
-			spin_lock(sch->lock);
+		if (!sch) {
+			/* Clear pending interrupt condition. */
+			tsch(tpi_info->schid, irb);
+			continue;
+		}
+		spin_lock(sch->lock);
 		/* Store interrupt response block to lowcore. */
-		if (tsch (tpi_info->schid, irb) == 0 && sch) {
+		if (tsch(tpi_info->schid, irb) == 0) {
 			/* Keep subchannel information word up to date. */
 			memcpy (&sch->schib.scsw, &irb->scsw,
 				sizeof (irb->scsw));
@@ -683,8 +685,7 @@
 			if (sch->driver && sch->driver->irq)
 				sch->driver->irq(sch);
 		}
-		if (sch)
-			spin_unlock(sch->lock);
+		spin_unlock(sch->lock);
 		/*
 		 * Are more interrupts pending?
 		 * If so, the tpi instruction will update the lowcore
@@ -710,8 +711,9 @@
 /*
  * busy wait for the next interrupt on the console
  */
-void
-wait_cons_dev (void)
+void wait_cons_dev(void)
+	__releases(console_subchannel.lock)
+	__acquires(console_subchannel.lock)
 {
 	unsigned long cr6      __attribute__ ((aligned (8)));
 	unsigned long save_cr6 __attribute__ ((aligned (8)));
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index 52afa4c..08f2235 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -100,6 +100,7 @@
 
 int cio_create_sch_lock(struct subchannel *);
 void do_adapter_IO(void);
+void do_IRQ(struct pt_regs *);
 
 /* Use with care. */
 #ifdef CONFIG_CCW_CONSOLE
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 3b45bbe..c1afab5 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -533,6 +533,12 @@
 	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
 }
 
+void css_wait_for_slow_path(void)
+{
+	flush_workqueue(ccw_device_notify_work);
+	flush_workqueue(slow_path_wq);
+}
+
 /* Reprobe subchannel if unregistered. */
 static int reprobe_subchannel(struct subchannel_id schid, void *data)
 {
@@ -683,10 +689,14 @@
 		   char *buf)
 {
 	struct channel_subsystem *css = to_css(dev);
+	int ret;
 
 	if (!css)
 		return 0;
-	return sprintf(buf, "%x\n", css->cm_enabled);
+	mutex_lock(&css->mutex);
+	ret = sprintf(buf, "%x\n", css->cm_enabled);
+	mutex_unlock(&css->mutex);
+	return ret;
 }
 
 static ssize_t
@@ -696,6 +706,7 @@
 	struct channel_subsystem *css = to_css(dev);
 	int ret;
 
+	mutex_lock(&css->mutex);
 	switch (buf[0]) {
 	case '0':
 		ret = css->cm_enabled ? chsc_secm(css, 0) : 0;
@@ -706,6 +717,7 @@
 	default:
 		ret = -EINVAL;
 	}
+	mutex_unlock(&css->mutex);
 	return ret < 0 ? ret : count;
 }
 
@@ -752,9 +764,11 @@
 		struct channel_subsystem *css;
 
 		css = channel_subsystems[i];
+		mutex_lock(&css->mutex);
 		if (css->cm_enabled)
 			if (chsc_secm(css, 0))
 				ret = NOTIFY_BAD;
+		mutex_unlock(&css->mutex);
 	}
 
 	return ret;
diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h
index b705545..e191351 100644
--- a/drivers/s390/cio/css.h
+++ b/drivers/s390/cio/css.h
@@ -144,6 +144,7 @@
 int css_sch_is_valid(struct schib *);
 
 extern struct workqueue_struct *slow_path_wq;
+void css_wait_for_slow_path(void);
 
 extern struct attribute_group *subch_attr_groups[];
 #endif
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index fec004f..e0c7adb 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -577,7 +577,6 @@
 static DEVICE_ATTR(cutype, 0444, cutype_show, NULL);
 static DEVICE_ATTR(modalias, 0444, modalias_show, NULL);
 static DEVICE_ATTR(online, 0644, online_show, online_store);
-extern struct device_attribute dev_attr_cmb_enable;
 static DEVICE_ATTR(availability, 0444, available_show, NULL);
 
 static struct attribute * subch_attrs[] = {
diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h
index d40a2ff..cb08092 100644
--- a/drivers/s390/cio/device.h
+++ b/drivers/s390/cio/device.h
@@ -127,4 +127,5 @@
 void retry_set_schib(struct ccw_device *cdev);
 void cmf_retry_copy_block(struct ccw_device *);
 int cmf_reenable(struct ccw_device *);
+extern struct device_attribute dev_attr_cmb_enable;
 #endif
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 49b58eb..a1718a0 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -193,8 +193,15 @@
 			return -EACCES;
 	}
 	ret = cio_start_key (sch, cpa, lpm, key);
-	if (ret == 0)
+	switch (ret) {
+	case 0:
 		cdev->private->intparm = intparm;
+		break;
+	case -EACCES:
+	case -ENODEV:
+		dev_fsm_event(cdev, DEV_EVENT_VERIFY);
+		break;
+	}
 	return ret;
 }
 
diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c
index ebe0848..4a38993 100644
--- a/drivers/s390/cio/device_status.c
+++ b/drivers/s390/cio/device_status.c
@@ -62,7 +62,7 @@
 	stsch (sch->schid, &sch->schib);
 
 	CIO_MSG_EVENT(0, "%s(0.%x.%04x) - path(s) %02x are "
-		      "not operational \n", __FUNCTION__,
+		      "not operational \n", __func__,
 		      sch->schid.ssid, sch->schid.sch_no,
 		      sch->schib.pmcw.pnom);
 
@@ -312,6 +312,7 @@
 {
 	struct subchannel *sch;
 	struct ccw1 *sense_ccw;
+	int rc;
 
 	sch = to_subchannel(cdev->dev.parent);
 
@@ -337,7 +338,10 @@
 	/* Reset internal retry indication. */
 	cdev->private->flags.intretry = 0;
 
-	return cio_start(sch, sense_ccw, 0xff);
+	rc = cio_start(sch, sense_ccw, 0xff);
+	if (rc == -ENODEV || rc == -EACCES)
+		dev_fsm_event(cdev, DEV_EVENT_VERIFY);
+	return rc;
 }
 
 /*
diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c
index 2b5bfb7..c359386 100644
--- a/drivers/s390/cio/qdio.c
+++ b/drivers/s390/cio/qdio.c
@@ -1399,7 +1399,7 @@
 	 * q->dev_st_chg_ind is the indicator, be it shared or not.
 	 * only clear it, if indicator is non-shared
 	 */
-	if (!spare_ind_was_set)
+	if (q->dev_st_chg_ind != &spare_indicator)
 		tiqdio_clear_summary_bit((__u32*)q->dev_st_chg_ind);
 
 	if (q->hydra_gives_outbound_pcis) {
@@ -2217,9 +2217,78 @@
 	return cc;
 }
 
+static int
+qdio_get_ssqd_information(struct subchannel_id *schid,
+			  struct qdio_chsc_ssqd **ssqd_area)
+{
+	int result;
+
+	QDIO_DBF_TEXT0(0, setup, "getssqd");
+	*ssqd_area = mempool_alloc(qdio_mempool_scssc, GFP_ATOMIC);
+	if (!ssqd_area) {
+		QDIO_PRINT_WARN("Could not get memory for chsc on sch x%x.\n",
+				schid->sch_no);
+		return -ENOMEM;
+	}
+
+	(*ssqd_area)->request = (struct chsc_header) {
+		.length = 0x0010,
+		.code	= 0x0024,
+	};
+	(*ssqd_area)->first_sch = schid->sch_no;
+	(*ssqd_area)->last_sch = schid->sch_no;
+	(*ssqd_area)->ssid = schid->ssid;
+	result = chsc(*ssqd_area);
+
+	if (result) {
+		QDIO_PRINT_WARN("CHSC returned cc %i on sch 0.%x.%x.\n",
+				result, schid->ssid, schid->sch_no);
+		goto out;
+	}
+
+	if ((*ssqd_area)->response.code != QDIO_CHSC_RESPONSE_CODE_OK) {
+		QDIO_PRINT_WARN("CHSC response is 0x%x on sch 0.%x.%x.\n",
+				(*ssqd_area)->response.code,
+				schid->ssid, schid->sch_no);
+		goto out;
+	}
+	if (!((*ssqd_area)->flags & CHSC_FLAG_QDIO_CAPABILITY) ||
+	    !((*ssqd_area)->flags & CHSC_FLAG_VALIDITY) ||
+	    ((*ssqd_area)->sch != schid->sch_no)) {
+		QDIO_PRINT_WARN("huh? problems checking out sch 0.%x.%x... " \
+				"using all SIGAs.\n",
+				schid->ssid, schid->sch_no);
+		goto out;
+	}
+	return 0;
+out:
+	return -EINVAL;
+}
+
+int
+qdio_get_ssqd_pct(struct ccw_device *cdev)
+{
+	struct qdio_chsc_ssqd *ssqd_area;
+	struct subchannel_id schid;
+	char dbf_text[15];
+	int rc;
+	int pct = 0;
+
+	QDIO_DBF_TEXT0(0, setup, "getpct");
+	schid = ccw_device_get_subchannel_id(cdev);
+	rc = qdio_get_ssqd_information(&schid, &ssqd_area);
+	if (!rc)
+		pct = (int)ssqd_area->pct;
+	if (rc != -ENOMEM)
+		mempool_free(ssqd_area, qdio_mempool_scssc);
+	sprintf(dbf_text, "pct: %d", pct);
+	QDIO_DBF_TEXT2(0, setup, dbf_text);
+	return pct;
+}
+EXPORT_SYMBOL(qdio_get_ssqd_pct);
+
 static void
-qdio_check_subchannel_qebsm(struct qdio_irq *irq_ptr, unsigned char qdioac,
-			    unsigned long token)
+qdio_check_subchannel_qebsm(struct qdio_irq *irq_ptr, unsigned long token)
 {
 	struct qdio_q *q;
 	int i;
@@ -2227,7 +2296,7 @@
 	char dbf_text[15];
 
 	/*check if QEBSM is disabled */
-	if (!(irq_ptr->is_qebsm) || !(qdioac & 0x01)) {
+	if (!(irq_ptr->is_qebsm) || !(irq_ptr->qdioac & 0x01)) {
 		irq_ptr->is_qebsm  = 0;
 		irq_ptr->sch_token = 0;
 		irq_ptr->qib.rflags &= ~QIB_RFLAGS_ENABLE_QEBSM;
@@ -2256,102 +2325,27 @@
 }
 
 static void
-qdio_get_ssqd_information(struct qdio_irq *irq_ptr)
+qdio_get_ssqd_siga(struct qdio_irq *irq_ptr)
 {
-	int result;
-	unsigned char qdioac;
-	struct {
-		struct chsc_header request;
-		u16 reserved1:10;
-		u16 ssid:2;
-		u16 fmt:4;
-		u16 first_sch;
-		u16 reserved2;
-		u16 last_sch;
-		u32 reserved3;
-		struct chsc_header response;
-		u32 reserved4;
-		u8  flags;
-		u8  reserved5;
-		u16 sch;
-		u8  qfmt;
-		u8  parm;
-		u8  qdioac1;
-		u8  sch_class;
-		u8  reserved7;
-		u8  icnt;
-		u8  reserved8;
-		u8  ocnt;
-		u8 reserved9;
-		u8 mbccnt;
-		u16 qdioac2;
-		u64 sch_token;
-	} *ssqd_area;
+	int rc;
+	struct qdio_chsc_ssqd *ssqd_area;
 
 	QDIO_DBF_TEXT0(0,setup,"getssqd");
-	qdioac = 0;
-	ssqd_area = mempool_alloc(qdio_mempool_scssc, GFP_ATOMIC);
-	if (!ssqd_area) {
-	        QDIO_PRINT_WARN("Could not get memory for chsc. Using all " \
-				"SIGAs for sch x%x.\n", irq_ptr->schid.sch_no);
+	irq_ptr->qdioac = 0;
+	rc = qdio_get_ssqd_information(&irq_ptr->schid, &ssqd_area);
+	if (rc) {
+		QDIO_PRINT_WARN("using all SIGAs for sch x%x.n",
+			irq_ptr->schid.sch_no);
 		irq_ptr->qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY |
 				  CHSC_FLAG_SIGA_OUTPUT_NECESSARY |
 				  CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */
 		irq_ptr->is_qebsm = 0;
-		irq_ptr->sch_token = 0;
-		irq_ptr->qib.rflags &= ~QIB_RFLAGS_ENABLE_QEBSM;
-		return;
-	}
+	} else
+		irq_ptr->qdioac = ssqd_area->qdioac1;
 
-	ssqd_area->request = (struct chsc_header) {
-		.length = 0x0010,
-		.code   = 0x0024,
-	};
-	ssqd_area->first_sch = irq_ptr->schid.sch_no;
-	ssqd_area->last_sch = irq_ptr->schid.sch_no;
-	ssqd_area->ssid = irq_ptr->schid.ssid;
-	result = chsc(ssqd_area);
-
-	if (result) {
-		QDIO_PRINT_WARN("CHSC returned cc %i. Using all " \
-				"SIGAs for sch 0.%x.%x.\n", result,
-				irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
-		qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY |
-			CHSC_FLAG_SIGA_OUTPUT_NECESSARY |
-			CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */
-		irq_ptr->is_qebsm  = 0;
-		goto out;
-	}
-
-	if (ssqd_area->response.code != QDIO_CHSC_RESPONSE_CODE_OK) {
-		QDIO_PRINT_WARN("response upon checking SIGA needs " \
-				"is 0x%x. Using all SIGAs for sch 0.%x.%x.\n",
-				ssqd_area->response.code,
-				irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
-		qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY |
-			CHSC_FLAG_SIGA_OUTPUT_NECESSARY |
-			CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */
-		irq_ptr->is_qebsm  = 0;
-		goto out;
-	}
-	if (!(ssqd_area->flags & CHSC_FLAG_QDIO_CAPABILITY) ||
-	    !(ssqd_area->flags & CHSC_FLAG_VALIDITY) ||
-	    (ssqd_area->sch != irq_ptr->schid.sch_no)) {
-		QDIO_PRINT_WARN("huh? problems checking out sch 0.%x.%x... " \
-				"using all SIGAs.\n",
-				irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
-		qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY |
-			CHSC_FLAG_SIGA_OUTPUT_NECESSARY |
-			CHSC_FLAG_SIGA_SYNC_NECESSARY; /* worst case */
-		irq_ptr->is_qebsm  = 0;
-		goto out;
-	}
-	qdioac = ssqd_area->qdioac1;
-out:
-	qdio_check_subchannel_qebsm(irq_ptr, qdioac,
-				    ssqd_area->sch_token);
-	mempool_free(ssqd_area, qdio_mempool_scssc);
-	irq_ptr->qdioac = qdioac;
+	qdio_check_subchannel_qebsm(irq_ptr, ssqd_area->sch_token);
+	if (rc != -ENOMEM)
+		mempool_free(ssqd_area, qdio_mempool_scssc);
 }
 
 static unsigned int
@@ -3227,7 +3221,7 @@
 		return -EIO;
 	}
 
-	qdio_get_ssqd_information(irq_ptr);
+	qdio_get_ssqd_siga(irq_ptr);
 	/* if this gets set once, we're running under VM and can omit SVSes */
 	if (irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_NECESSARY)
 		omit_svs=1;
diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index da8a272..c3df6b2 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -406,6 +406,34 @@
 #define CHSC_FLAG_SIGA_SYNC_DONE_ON_THININTS 0x08
 #define CHSC_FLAG_SIGA_SYNC_DONE_ON_OUTB_PCIS 0x04
 
+struct qdio_chsc_ssqd {
+	struct chsc_header request;
+	u16 reserved1:10;
+	u16 ssid:2;
+	u16 fmt:4;
+	u16 first_sch;
+	u16 reserved2;
+	u16 last_sch;
+	u32 reserved3;
+	struct chsc_header response;
+	u32 reserved4;
+	u8  flags;
+	u8  reserved5;
+	u16 sch;
+	u8  qfmt;
+	u8  parm;
+	u8  qdioac1;
+	u8  sch_class;
+	u8  pct;
+	u8  icnt;
+	u8  reserved7;
+	u8  ocnt;
+	u8  reserved8;
+	u8  mbccnt;
+	u16 qdioac2;
+	u64 sch_token;
+};
+
 struct qdio_perf_stats {
 #ifdef CONFIG_64BIT
 	atomic64_t tl_runs;
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 7b0b819..a1ab3e3 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -45,7 +45,7 @@
 static void ap_poll_thread_stop(void);
 static void ap_request_timeout(unsigned long);
 
-/**
+/*
  * Module description.
  */
 MODULE_AUTHOR("IBM Corporation");
@@ -53,7 +53,7 @@
 		   "Copyright 2006 IBM Corporation");
 MODULE_LICENSE("GPL");
 
-/**
+/*
  * Module parameter
  */
 int ap_domain_index = -1;	/* Adjunct Processor Domain Index */
@@ -69,7 +69,7 @@
 static DEFINE_SPINLOCK(ap_device_lock);
 static LIST_HEAD(ap_device_list);
 
-/**
+/*
  * Workqueue & timer for bus rescan.
  */
 static struct workqueue_struct *ap_work_queue;
@@ -77,7 +77,7 @@
 static int ap_config_time = AP_CONFIG_TIME;
 static DECLARE_WORK(ap_config_work, ap_scan_bus);
 
-/**
+/*
  * Tasklet & timer for AP request polling.
  */
 static struct timer_list ap_poll_timer = TIMER_INITIALIZER(ap_poll_timeout,0,0);
@@ -88,9 +88,9 @@
 static DEFINE_MUTEX(ap_poll_thread_mutex);
 
 /**
- * Test if ap instructions are available.
+ * ap_intructions_available() - Test if AP instructions are available.
  *
- * Returns 0 if the ap instructions are installed.
+ * Returns 0 if the AP instructions are installed.
  */
 static inline int ap_instructions_available(void)
 {
@@ -108,12 +108,12 @@
 }
 
 /**
- * Test adjunct processor queue.
- * @qid: the ap queue number
- * @queue_depth: pointer to queue depth value
- * @device_type: pointer to device type value
+ * ap_test_queue(): Test adjunct processor queue.
+ * @qid: The AP queue number
+ * @queue_depth: Pointer to queue depth value
+ * @device_type: Pointer to device type value
  *
- * Returns ap queue status structure.
+ * Returns AP queue status structure.
  */
 static inline struct ap_queue_status
 ap_test_queue(ap_qid_t qid, int *queue_depth, int *device_type)
@@ -130,10 +130,10 @@
 }
 
 /**
- * Reset adjunct processor queue.
- * @qid: the ap queue number
+ * ap_reset_queue(): Reset adjunct processor queue.
+ * @qid: The AP queue number
  *
- * Returns ap queue status structure.
+ * Returns AP queue status structure.
  */
 static inline struct ap_queue_status ap_reset_queue(ap_qid_t qid)
 {
@@ -148,16 +148,14 @@
 }
 
 /**
- * Send message to adjunct processor queue.
- * @qid: the ap queue number
- * @psmid: the program supplied message identifier
- * @msg: the message text
- * @length: the message length
+ * __ap_send(): Send message to adjunct processor queue.
+ * @qid: The AP queue number
+ * @psmid: The program supplied message identifier
+ * @msg: The message text
+ * @length: The message length
  *
- * Returns ap queue status structure.
- *
+ * Returns AP queue status structure.
  * Condition code 1 on NQAP can't happen because the L bit is 1.
- *
  * Condition code 2 on NQAP also means the send is incomplete,
  * because a segment boundary was reached. The NQAP is repeated.
  */
@@ -198,23 +196,20 @@
 }
 EXPORT_SYMBOL(ap_send);
 
-/*
- * Receive message from adjunct processor queue.
- * @qid: the ap queue number
- * @psmid: pointer to program supplied message identifier
- * @msg: the message text
- * @length: the message length
+/**
+ * __ap_recv(): Receive message from adjunct processor queue.
+ * @qid: The AP queue number
+ * @psmid: Pointer to program supplied message identifier
+ * @msg: The message text
+ * @length: The message length
  *
- * Returns ap queue status structure.
- *
+ * Returns AP queue status structure.
  * Condition code 1 on DQAP means the receive has taken place
  * but only partially.	The response is incomplete, hence the
  * DQAP is repeated.
- *
  * Condition code 2 on DQAP also means the receive is incomplete,
  * this time because a segment boundary was reached. Again, the
  * DQAP is repeated.
- *
  * Note that gpr2 is used by the DQAP instruction to keep track of
  * any 'residual' length, in case the instruction gets interrupted.
  * Hence it gets zeroed before the instruction.
@@ -263,11 +258,12 @@
 EXPORT_SYMBOL(ap_recv);
 
 /**
- * Check if an AP queue is available. The test is repeated for
- * AP_MAX_RESET times.
- * @qid: the ap queue number
- * @queue_depth: pointer to queue depth value
- * @device_type: pointer to device type value
+ * ap_query_queue(): Check if an AP queue is available.
+ * @qid: The AP queue number
+ * @queue_depth: Pointer to queue depth value
+ * @device_type: Pointer to device type value
+ *
+ * The test is repeated for AP_MAX_RESET times.
  */
 static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type)
 {
@@ -308,8 +304,10 @@
 }
 
 /**
+ * ap_init_queue(): Reset an AP queue.
+ * @qid: The AP queue number
+ *
  * Reset an AP queue and wait for it to become available again.
- * @qid: the ap queue number
  */
 static int ap_init_queue(ap_qid_t qid)
 {
@@ -346,7 +344,10 @@
 }
 
 /**
- * Arm request timeout if a AP device was idle and a new request is submitted.
+ * ap_increase_queue_count(): Arm request timeout.
+ * @ap_dev: Pointer to an AP device.
+ *
+ * Arm request timeout if an AP device was idle and a new request is submitted.
  */
 static void ap_increase_queue_count(struct ap_device *ap_dev)
 {
@@ -360,7 +361,10 @@
 }
 
 /**
- * AP device is still alive, re-schedule request timeout if there are still
+ * ap_decrease_queue_count(): Decrease queue count.
+ * @ap_dev: Pointer to an AP device.
+ *
+ * If AP device is still alive, re-schedule request timeout if there are still
  * pending requests.
  */
 static void ap_decrease_queue_count(struct ap_device *ap_dev)
@@ -371,7 +375,7 @@
 	if (ap_dev->queue_count > 0)
 		mod_timer(&ap_dev->timeout, jiffies + timeout);
 	else
-		/**
+		/*
 		 * The timeout timer should to be disabled now - since
 		 * del_timer_sync() is very expensive, we just tell via the
 		 * reset flag to ignore the pending timeout timer.
@@ -379,7 +383,7 @@
 		ap_dev->reset = AP_RESET_IGNORE;
 }
 
-/**
+/*
  * AP device related attributes.
  */
 static ssize_t ap_hwtype_show(struct device *dev,
@@ -433,6 +437,10 @@
 };
 
 /**
+ * ap_bus_match()
+ * @dev: Pointer to device
+ * @drv: Pointer to device_driver
+ *
  * AP bus driver registration/unregistration.
  */
 static int ap_bus_match(struct device *dev, struct device_driver *drv)
@@ -441,7 +449,7 @@
 	struct ap_driver *ap_drv = to_ap_drv(drv);
 	struct ap_device_id *id;
 
-	/**
+	/*
 	 * Compare device type of the device with the list of
 	 * supported types of the device_driver.
 	 */
@@ -455,8 +463,12 @@
 }
 
 /**
- * uevent function for AP devices. It sets up a single environment
- * variable DEV_TYPE which contains the hardware device type.
+ * ap_uevent(): Uevent function for AP devices.
+ * @dev: Pointer to device
+ * @env: Pointer to kobj_uevent_env
+ *
+ * It sets up a single environment variable DEV_TYPE which contains the
+ * hardware device type.
  */
 static int ap_uevent (struct device *dev, struct kobj_uevent_env *env)
 {
@@ -500,8 +512,10 @@
 }
 
 /**
+ * __ap_flush_queue(): Flush requests.
+ * @ap_dev: Pointer to the AP device
+ *
  * Flush all requests from the request/pending queue of an AP device.
- * @ap_dev: pointer to the AP device.
  */
 static void __ap_flush_queue(struct ap_device *ap_dev)
 {
@@ -565,7 +579,7 @@
 }
 EXPORT_SYMBOL(ap_driver_unregister);
 
-/**
+/*
  * AP bus attributes.
  */
 static ssize_t ap_domain_show(struct bus_type *bus, char *buf)
@@ -630,14 +644,16 @@
 };
 
 /**
- * Pick one of the 16 ap domains.
+ * ap_select_domain(): Select an AP domain.
+ *
+ * Pick one of the 16 AP domains.
  */
 static int ap_select_domain(void)
 {
 	int queue_depth, device_type, count, max_count, best_domain;
 	int rc, i, j;
 
-	/**
+	/*
 	 * We want to use a single domain. Either the one specified with
 	 * the "domain=" parameter or the domain with the maximum number
 	 * of devices.
@@ -669,8 +685,10 @@
 }
 
 /**
- * Find the device type if query queue returned a device type of 0.
+ * ap_probe_device_type(): Find the device type of an AP.
  * @ap_dev: pointer to the AP device.
+ *
+ * Find the device type if query queue returned a device type of 0.
  */
 static int ap_probe_device_type(struct ap_device *ap_dev)
 {
@@ -764,7 +782,11 @@
 }
 
 /**
- * Scan the ap bus for new devices.
+ * __ap_scan_bus(): Scan the AP bus.
+ * @dev: Pointer to device
+ * @data: Pointer to data
+ *
+ * Scan the AP bus for new devices.
  */
 static int __ap_scan_bus(struct device *dev, void *data)
 {
@@ -867,6 +889,8 @@
 }
 
 /**
+ * ap_schedule_poll_timer(): Schedule poll timer.
+ *
  * Set up the timer to run the poll tasklet
  */
 static inline void ap_schedule_poll_timer(void)
@@ -877,10 +901,11 @@
 }
 
 /**
- * Receive pending reply messages from an AP device.
+ * ap_poll_read(): Receive pending reply messages from an AP device.
  * @ap_dev: pointer to the AP device
  * @flags: pointer to control flags, bit 2^0 is set if another poll is
  *	   required, bit 2^1 is set if the poll timer needs to get armed
+ *
  * Returns 0 if the device is still present, -ENODEV if not.
  */
 static int ap_poll_read(struct ap_device *ap_dev, unsigned long *flags)
@@ -925,10 +950,11 @@
 }
 
 /**
- * Send messages from the request queue to an AP device.
+ * ap_poll_write(): Send messages from the request queue to an AP device.
  * @ap_dev: pointer to the AP device
  * @flags: pointer to control flags, bit 2^0 is set if another poll is
  *	   required, bit 2^1 is set if the poll timer needs to get armed
+ *
  * Returns 0 if the device is still present, -ENODEV if not.
  */
 static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags)
@@ -968,11 +994,13 @@
 }
 
 /**
- * Poll AP device for pending replies and send new messages. If either
- * ap_poll_read or ap_poll_write returns -ENODEV unregister the device.
+ * ap_poll_queue(): Poll AP device for pending replies and send new messages.
  * @ap_dev: pointer to the bus device
  * @flags: pointer to control flags, bit 2^0 is set if another poll is
  *	   required, bit 2^1 is set if the poll timer needs to get armed
+ *
+ * Poll AP device for pending replies and send new messages. If either
+ * ap_poll_read or ap_poll_write returns -ENODEV unregister the device.
  * Returns 0.
  */
 static inline int ap_poll_queue(struct ap_device *ap_dev, unsigned long *flags)
@@ -986,9 +1014,11 @@
 }
 
 /**
- * Queue a message to a device.
+ * __ap_queue_message(): Queue a message to a device.
  * @ap_dev: pointer to the AP device
  * @ap_msg: the message to be queued
+ *
+ * Queue a message to a device. Returns 0 if successful.
  */
 static int __ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
 {
@@ -1055,12 +1085,14 @@
 EXPORT_SYMBOL(ap_queue_message);
 
 /**
+ * ap_cancel_message(): Cancel a crypto request.
+ * @ap_dev: The AP device that has the message queued
+ * @ap_msg: The message that is to be removed
+ *
  * Cancel a crypto request. This is done by removing the request
- * from the devive pendingq or requestq queue. Note that the
+ * from the device pending or request queue. Note that the
  * request stays on the AP queue. When it finishes the message
  * reply will be discarded because the psmid can't be found.
- * @ap_dev: AP device that has the message queued
- * @ap_msg: the message that is to be removed
  */
 void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg)
 {
@@ -1082,7 +1114,10 @@
 EXPORT_SYMBOL(ap_cancel_message);
 
 /**
- * AP receive polling for finished AP requests
+ * ap_poll_timeout(): AP receive polling for finished AP requests.
+ * @unused: Unused variable.
+ *
+ * Schedules the AP tasklet.
  */
 static void ap_poll_timeout(unsigned long unused)
 {
@@ -1090,6 +1125,9 @@
 }
 
 /**
+ * ap_reset(): Reset a not responding AP device.
+ * @ap_dev: Pointer to the AP device
+ *
  * Reset a not responding AP device and move all requests from the
  * pending queue to the request queue.
  */
@@ -1108,11 +1146,6 @@
 		ap_dev->unregistered = 1;
 }
 
-/**
- * Poll all AP devices on the bus in a round robin fashion. Continue
- * polling until bit 2^0 of the control flags is not set. If bit 2^1
- * of the control flags has been set arm the poll timer.
- */
 static int __ap_poll_all(struct ap_device *ap_dev, unsigned long *flags)
 {
 	spin_lock(&ap_dev->lock);
@@ -1126,6 +1159,14 @@
 	return 0;
 }
 
+/**
+ * ap_poll_all(): Poll all AP devices.
+ * @dummy: Unused variable
+ *
+ * Poll all AP devices on the bus in a round robin fashion. Continue
+ * polling until bit 2^0 of the control flags is not set. If bit 2^1
+ * of the control flags has been set arm the poll timer.
+ */
 static void ap_poll_all(unsigned long dummy)
 {
 	unsigned long flags;
@@ -1144,6 +1185,9 @@
 }
 
 /**
+ * ap_poll_thread(): Thread that polls for finished requests.
+ * @data: Unused pointer
+ *
  * AP bus poll thread. The purpose of this thread is to poll for
  * finished requests in a loop if there is a "free" cpu - that is
  * a cpu that doesn't have anything better to do. The polling stops
@@ -1213,7 +1257,10 @@
 }
 
 /**
- * Handling of request timeouts
+ * ap_request_timeout(): Handling of request timeouts
+ * @data: Holds the AP device.
+ *
+ * Handles request timeouts.
  */
 static void ap_request_timeout(unsigned long data)
 {
@@ -1246,7 +1293,9 @@
 };
 
 /**
- * The module initialization code.
+ * ap_module_init(): The module initialization code.
+ *
+ * Initializes the module.
  */
 int __init ap_module_init(void)
 {
@@ -1288,7 +1337,7 @@
 	if (ap_select_domain() == 0)
 		ap_scan_bus(NULL);
 
-	/* Setup the ap bus rescan timer. */
+	/* Setup the AP bus rescan timer. */
 	init_timer(&ap_config_timer);
 	ap_config_timer.function = ap_config_timeout;
 	ap_config_timer.data = 0;
@@ -1325,7 +1374,9 @@
 }
 
 /**
- * The module termination code
+ * ap_modules_exit(): The module termination code
+ *
+ * Terminates the module.
  */
 void ap_module_exit(void)
 {
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 87c2d64..c1e1200 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -50,6 +50,15 @@
 #define AP_QID_QUEUE(_qid) ((_qid) & 15)
 
 /**
+ * structy ap_queue_status - Holds the AP queue status.
+ * @queue_empty: Shows if queue is empty
+ * @replies_waiting: Waiting replies
+ * @queue_full: Is 1 if the queue is full
+ * @pad: A 4 bit pad
+ * @int_enabled: Shows if interrupts are enabled for the AP
+ * @response_conde: Holds the 8 bit response code
+ * @pad2: A 16 bit pad
+ *
  * The ap queue status word is returned by all three AP functions
  * (PQAP, NQAP and DQAP).  There's a set of flags in the first
  * byte, followed by a 1 byte response code.
@@ -75,7 +84,7 @@
 #define AP_RESPONSE_NO_FIRST_PART	0x13
 #define AP_RESPONSE_MESSAGE_TOO_BIG	0x15
 
-/**
+/*
  * Known device types
  */
 #define AP_DEVICE_TYPE_PCICC	3
@@ -84,7 +93,7 @@
 #define AP_DEVICE_TYPE_CEX2A	6
 #define AP_DEVICE_TYPE_CEX2C	7
 
-/**
+/*
  * AP reset flag states
  */
 #define AP_RESET_IGNORE	0	/* request timeout will be ignored */
@@ -152,7 +161,7 @@
 	.dev_type=(dt),					\
 	.match_flags=AP_DEVICE_ID_MATCH_DEVICE_TYPE,
 
-/**
+/*
  * Note: don't use ap_send/ap_recv after using ap_queue_message
  * for the first time. Otherwise the ap message queue will get
  * confused.
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index e3625a4..4d36e80 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -36,10 +36,11 @@
 #include <linux/compat.h>
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
+#include <linux/hw_random.h>
 
 #include "zcrypt_api.h"
 
-/**
+/*
  * Module description.
  */
 MODULE_AUTHOR("IBM Corporation");
@@ -52,7 +53,10 @@
 static int zcrypt_device_count = 0;
 static atomic_t zcrypt_open_count = ATOMIC_INIT(0);
 
-/**
+static int zcrypt_rng_device_add(void);
+static void zcrypt_rng_device_remove(void);
+
+/*
  * Device attributes common for all crypto devices.
  */
 static ssize_t zcrypt_type_show(struct device *dev,
@@ -99,6 +103,9 @@
 };
 
 /**
+ * __zcrypt_increase_preference(): Increase preference of a crypto device.
+ * @zdev: Pointer the crypto device
+ *
  * Move the device towards the head of the device list.
  * Need to be called while holding the zcrypt device list lock.
  * Note: cards with speed_rating of 0 are kept at the end of the list.
@@ -125,6 +132,9 @@
 }
 
 /**
+ * __zcrypt_decrease_preference(): Decrease preference of a crypto device.
+ * @zdev: Pointer to a crypto device.
+ *
  * Move the device towards the tail of the device list.
  * Need to be called while holding the zcrypt device list lock.
  * Note: cards with speed_rating of 0 are kept at the end of the list.
@@ -198,7 +208,10 @@
 EXPORT_SYMBOL(zcrypt_device_free);
 
 /**
- * Register a crypto device.
+ * zcrypt_device_register() - Register a crypto device.
+ * @zdev: Pointer to a crypto device
+ *
+ * Register a crypto device. Returns 0 if successful.
  */
 int zcrypt_device_register(struct zcrypt_device *zdev)
 {
@@ -216,16 +229,37 @@
 	__zcrypt_increase_preference(zdev);
 	zcrypt_device_count++;
 	spin_unlock_bh(&zcrypt_device_lock);
+	if (zdev->ops->rng) {
+		rc = zcrypt_rng_device_add();
+		if (rc)
+			goto out_unregister;
+	}
+	return 0;
+
+out_unregister:
+	spin_lock_bh(&zcrypt_device_lock);
+	zcrypt_device_count--;
+	list_del_init(&zdev->list);
+	spin_unlock_bh(&zcrypt_device_lock);
+	sysfs_remove_group(&zdev->ap_dev->device.kobj,
+			   &zcrypt_device_attr_group);
+	put_device(&zdev->ap_dev->device);
+	zcrypt_device_put(zdev);
 out:
 	return rc;
 }
 EXPORT_SYMBOL(zcrypt_device_register);
 
 /**
+ * zcrypt_device_unregister(): Unregister a crypto device.
+ * @zdev: Pointer to crypto device
+ *
  * Unregister a crypto device.
  */
 void zcrypt_device_unregister(struct zcrypt_device *zdev)
 {
+	if (zdev->ops->rng)
+		zcrypt_rng_device_remove();
 	spin_lock_bh(&zcrypt_device_lock);
 	zcrypt_device_count--;
 	list_del_init(&zdev->list);
@@ -238,7 +272,9 @@
 EXPORT_SYMBOL(zcrypt_device_unregister);
 
 /**
- * zcrypt_read is not be supported beyond zcrypt 1.3.1
+ * zcrypt_read (): Not supported beyond zcrypt 1.3.1.
+ *
+ * This function is not supported beyond zcrypt 1.3.1.
  */
 static ssize_t zcrypt_read(struct file *filp, char __user *buf,
 			   size_t count, loff_t *f_pos)
@@ -247,6 +283,8 @@
 }
 
 /**
+ * zcrypt_write(): Not allowed.
+ *
  * Write is is not allowed
  */
 static ssize_t zcrypt_write(struct file *filp, const char __user *buf,
@@ -256,7 +294,9 @@
 }
 
 /**
- * Device open/close functions to count number of users.
+ * zcrypt_open(): Count number of users.
+ *
+ * Device open function to count number of users.
  */
 static int zcrypt_open(struct inode *inode, struct file *filp)
 {
@@ -264,13 +304,18 @@
 	return 0;
 }
 
+/**
+ * zcrypt_release(): Count number of users.
+ *
+ * Device close function to count number of users.
+ */
 static int zcrypt_release(struct inode *inode, struct file *filp)
 {
 	atomic_dec(&zcrypt_open_count);
 	return 0;
 }
 
-/**
+/*
  * zcrypt ioctls.
  */
 static long zcrypt_rsa_modexpo(struct ica_rsa_modexpo *mex)
@@ -280,7 +325,7 @@
 
 	if (mex->outputdatalength < mex->inputdatalength)
 		return -EINVAL;
-	/**
+	/*
 	 * As long as outputdatalength is big enough, we can set the
 	 * outputdatalength equal to the inputdatalength, since that is the
 	 * number of bytes we will copy in any case
@@ -326,7 +371,7 @@
 	if (crt->outputdatalength < crt->inputdatalength ||
 	    (crt->inputdatalength & 1))
 		return -EINVAL;
-	/**
+	/*
 	 * As long as outputdatalength is big enough, we can set the
 	 * outputdatalength equal to the inputdatalength, since that is the
 	 * number of bytes we will copy in any case
@@ -343,7 +388,7 @@
 		    zdev->max_mod_size < crt->inputdatalength)
 			continue;
 		if (zdev->short_crt && crt->inputdatalength > 240) {
-			/**
+			/*
 			 * Check inputdata for leading zeros for cards
 			 * that can't handle np_prime, bp_key, or
 			 * u_mult_inv > 128 bytes.
@@ -359,7 +404,7 @@
 				    copy_from_user(&z3, crt->u_mult_inv, len))
 					return -EFAULT;
 				copied = 1;
-				/**
+				/*
 				 * We have to restart device lookup -
 				 * the device list may have changed by now.
 				 */
@@ -427,6 +472,37 @@
 	return -ENODEV;
 }
 
+static long zcrypt_rng(char *buffer)
+{
+	struct zcrypt_device *zdev;
+	int rc;
+
+	spin_lock_bh(&zcrypt_device_lock);
+	list_for_each_entry(zdev, &zcrypt_device_list, list) {
+		if (!zdev->online || !zdev->ops->rng)
+			continue;
+		zcrypt_device_get(zdev);
+		get_device(&zdev->ap_dev->device);
+		zdev->request_count++;
+		__zcrypt_decrease_preference(zdev);
+		if (try_module_get(zdev->ap_dev->drv->driver.owner)) {
+			spin_unlock_bh(&zcrypt_device_lock);
+			rc = zdev->ops->rng(zdev, buffer);
+			spin_lock_bh(&zcrypt_device_lock);
+			module_put(zdev->ap_dev->drv->driver.owner);
+		} else
+			rc = -EAGAIN;
+		zdev->request_count--;
+		__zcrypt_increase_preference(zdev);
+		put_device(&zdev->ap_dev->device);
+		zcrypt_device_put(zdev);
+		spin_unlock_bh(&zcrypt_device_lock);
+		return rc;
+	}
+	spin_unlock_bh(&zcrypt_device_lock);
+	return -ENODEV;
+}
+
 static void zcrypt_status_mask(char status[AP_DEVICES])
 {
 	struct zcrypt_device *zdev;
@@ -514,6 +590,8 @@
 }
 
 /**
+ * zcrypt_ica_status(): Old, depracted combi status call.
+ *
  * Old, deprecated combi status call.
  */
 static long zcrypt_ica_status(struct file *filp, unsigned long arg)
@@ -615,7 +693,7 @@
 				(int __user *) arg);
 	case Z90STAT_DOMAIN_INDEX:
 		return put_user(ap_domain_index, (int __user *) arg);
-	/**
+	/*
 	 * Deprecated ioctls. Don't add another device count ioctl,
 	 * you can count them yourself in the user space with the
 	 * output of the Z90STAT_STATUS_MASK ioctl.
@@ -653,7 +731,7 @@
 }
 
 #ifdef CONFIG_COMPAT
-/**
+/*
  * ioctl32 conversion routines
  */
 struct compat_ica_rsa_modexpo {
@@ -804,7 +882,7 @@
 }
 #endif
 
-/**
+/*
  * Misc device file operations.
  */
 static const struct file_operations zcrypt_fops = {
@@ -819,7 +897,7 @@
 	.release	= zcrypt_release
 };
 
-/**
+/*
  * Misc device.
  */
 static struct miscdevice zcrypt_misc_device = {
@@ -828,7 +906,7 @@
 	.fops	    = &zcrypt_fops,
 };
 
-/**
+/*
  * Deprecated /proc entry support.
  */
 static struct proc_dir_entry *zcrypt_entry;
@@ -1022,7 +1100,7 @@
 	}
 
 	for (j = 0; j < 64 && *ptr; ptr++) {
-		/**
+		/*
 		 * '0' for no device, '1' for PCICA, '2' for PCICC,
 		 * '3' for PCIXCC_MCL2, '4' for PCIXCC_MCL3,
 		 * '5' for CEX2C and '6' for CEX2A'
@@ -1041,7 +1119,76 @@
 	return count;
 }
 
+static int zcrypt_rng_device_count;
+static u32 *zcrypt_rng_buffer;
+static int zcrypt_rng_buffer_index;
+static DEFINE_MUTEX(zcrypt_rng_mutex);
+
+static int zcrypt_rng_data_read(struct hwrng *rng, u32 *data)
+{
+	int rc;
+
+	/*
+	 * We don't need locking here because the RNG API guarantees serialized
+	 * read method calls.
+	 */
+	if (zcrypt_rng_buffer_index == 0) {
+		rc = zcrypt_rng((char *) zcrypt_rng_buffer);
+		if (rc < 0)
+			return -EIO;
+		zcrypt_rng_buffer_index = rc / sizeof *data;
+	}
+	*data = zcrypt_rng_buffer[--zcrypt_rng_buffer_index];
+	return sizeof *data;
+}
+
+static struct hwrng zcrypt_rng_dev = {
+	.name		= "zcrypt",
+	.data_read	= zcrypt_rng_data_read,
+};
+
+static int zcrypt_rng_device_add(void)
+{
+	int rc = 0;
+
+	mutex_lock(&zcrypt_rng_mutex);
+	if (zcrypt_rng_device_count == 0) {
+		zcrypt_rng_buffer = (u32 *) get_zeroed_page(GFP_KERNEL);
+		if (!zcrypt_rng_buffer) {
+			rc = -ENOMEM;
+			goto out;
+		}
+		zcrypt_rng_buffer_index = 0;
+		rc = hwrng_register(&zcrypt_rng_dev);
+		if (rc)
+			goto out_free;
+		zcrypt_rng_device_count = 1;
+	} else
+		zcrypt_rng_device_count++;
+	mutex_unlock(&zcrypt_rng_mutex);
+	return 0;
+
+out_free:
+	free_page((unsigned long) zcrypt_rng_buffer);
+out:
+	mutex_unlock(&zcrypt_rng_mutex);
+	return rc;
+}
+
+static void zcrypt_rng_device_remove(void)
+{
+	mutex_lock(&zcrypt_rng_mutex);
+	zcrypt_rng_device_count--;
+	if (zcrypt_rng_device_count == 0) {
+		hwrng_unregister(&zcrypt_rng_dev);
+		free_page((unsigned long) zcrypt_rng_buffer);
+	}
+	mutex_unlock(&zcrypt_rng_mutex);
+}
+
 /**
+ * zcrypt_api_init(): Module initialization.
+ *
  * The module initialization code.
  */
 int __init zcrypt_api_init(void)
@@ -1076,6 +1223,8 @@
 }
 
 /**
+ * zcrypt_api_exit(): Module termination.
+ *
  * The module termination code.
  */
 void zcrypt_api_exit(void)
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
index de4877e..5c6e222 100644
--- a/drivers/s390/crypto/zcrypt_api.h
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -43,17 +43,17 @@
 #define DEV_NAME	"zcrypt"
 
 #define PRINTK(fmt, args...) \
-	printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
+	printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __func__ , ## args)
 #define PRINTKN(fmt, args...) \
 	printk(KERN_DEBUG DEV_NAME ": " fmt, ## args)
 #define PRINTKW(fmt, args...) \
-	printk(KERN_WARNING DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
+	printk(KERN_WARNING DEV_NAME ": %s -> " fmt, __func__ , ## args)
 #define PRINTKC(fmt, args...) \
-	printk(KERN_CRIT DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
+	printk(KERN_CRIT DEV_NAME ": %s -> " fmt, __func__ , ## args)
 
 #ifdef ZCRYPT_DEBUG
 #define PDEBUG(fmt, args...) \
-	printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __FUNCTION__ , ## args)
+	printk(KERN_DEBUG DEV_NAME ": %s -> " fmt, __func__ , ## args)
 #else
 #define PDEBUG(fmt, args...) do {} while (0)
 #endif
@@ -100,6 +100,13 @@
 #define ZCRYPT_CEX2C		5
 #define ZCRYPT_CEX2A		6
 
+/**
+ * Large random numbers are pulled in 4096 byte chunks from the crypto cards
+ * and stored in a page. Be carefull when increasing this buffer due to size
+ * limitations for AP requests.
+ */
+#define ZCRYPT_RNG_BUFFER_SIZE	4096
+
 struct zcrypt_device;
 
 struct zcrypt_ops {
@@ -107,6 +114,7 @@
 	long (*rsa_modexpo_crt)(struct zcrypt_device *,
 				struct ica_rsa_modexpo_crt *);
 	long (*send_cprb)(struct zcrypt_device *, struct ica_xcRB *);
+	long (*rng)(struct zcrypt_device *, char *);
 };
 
 struct zcrypt_device {
diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h
index 8dbcf0e..ed82f2f 100644
--- a/drivers/s390/crypto/zcrypt_cca_key.h
+++ b/drivers/s390/crypto/zcrypt_cca_key.h
@@ -174,7 +174,7 @@
 	key->pvtMeHdr = static_pvt_me_hdr;
 	key->pvtMeSec = static_pvt_me_sec;
 	key->pubMeSec = static_pub_me_sec;
-	/**
+	/*
 	 * In a private key, the modulus doesn't appear in the public
 	 * section. So, an arbitrary public exponent of 0x010001 will be
 	 * used.
@@ -338,7 +338,7 @@
 	pub = (struct cca_public_sec *)(key->key_parts + key_len);
 	*pub = static_cca_pub_sec;
 	pub->modulus_bit_len = 8 * crt->inputdatalength;
-	/**
+	/*
 	 * In a private key, the modulus doesn't appear in the public
 	 * section. So, an arbitrary public exponent of 0x010001 will be
 	 * used.
diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h
index 2cb616b..3e27fe7 100644
--- a/drivers/s390/crypto/zcrypt_error.h
+++ b/drivers/s390/crypto/zcrypt_error.h
@@ -108,7 +108,7 @@
 		return -EINVAL;
 	case REP82_ERROR_MESSAGE_TYPE:
 	//   REP88_ERROR_MESSAGE_TYPE		// '20' CEX2A
-		/**
+		/*
 		 * To sent a message of the wrong type is a bug in the
 		 * device driver. Warn about it, disable the device
 		 * and then repeat the request.
diff --git a/drivers/s390/crypto/zcrypt_pcicc.c b/drivers/s390/crypto/zcrypt_pcicc.c
index d6d59bf..17ea56c 100644
--- a/drivers/s390/crypto/zcrypt_pcicc.c
+++ b/drivers/s390/crypto/zcrypt_pcicc.c
@@ -42,7 +42,7 @@
 #define PCICC_MAX_MOD_SIZE_OLD	128	/* 1024 bits */
 #define PCICC_MAX_MOD_SIZE	256	/* 2048 bits */
 
-/**
+/*
  * PCICC cards need a speed rating of 0. This keeps them at the end of
  * the zcrypt device list (see zcrypt_api.c). PCICC cards are only
  * used if no other cards are present because they are slow and can only
@@ -388,7 +388,7 @@
 	reply_len = le16_to_cpu(msg->length) - 2;
 	if (reply_len > outputdatalength)
 		return -EINVAL;
-	/**
+	/*
 	 * For all encipher requests, the length of the ciphertext (reply_len)
 	 * will always equal the modulus length. For MEX decipher requests
 	 * the output needs to get padded. Minimum pad size is 10.
diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c
index 70b9ddc..0bc9b31 100644
--- a/drivers/s390/crypto/zcrypt_pcixcc.c
+++ b/drivers/s390/crypto/zcrypt_pcixcc.c
@@ -356,6 +356,55 @@
 }
 
 /**
+ * Prepare a type6 CPRB message for random number generation
+ *
+ * @ap_dev: AP device pointer
+ * @ap_msg: pointer to AP message
+ */
+static void rng_type6CPRB_msgX(struct ap_device *ap_dev,
+			       struct ap_message *ap_msg,
+			       unsigned random_number_length)
+{
+	struct {
+		struct type6_hdr hdr;
+		struct CPRBX cprbx;
+		char function_code[2];
+		short int rule_length;
+		char rule[8];
+		short int verb_length;
+		short int key_length;
+	} __attribute__((packed)) *msg = ap_msg->message;
+	static struct type6_hdr static_type6_hdrX = {
+		.type		= 0x06,
+		.offset1	= 0x00000058,
+		.agent_id	= {'C', 'A'},
+		.function_code	= {'R', 'L'},
+		.ToCardLen1	= sizeof *msg - sizeof(msg->hdr),
+		.FromCardLen1	= sizeof *msg - sizeof(msg->hdr),
+	};
+	static struct CPRBX static_cprbx = {
+		.cprb_len	= 0x00dc,
+		.cprb_ver_id	= 0x02,
+		.func_id	= {0x54, 0x32},
+		.req_parml	= sizeof *msg - sizeof(msg->hdr) -
+				  sizeof(msg->cprbx),
+		.rpl_msgbl	= sizeof *msg - sizeof(msg->hdr),
+	};
+
+	msg->hdr = static_type6_hdrX;
+	msg->hdr.FromCardLen2 = random_number_length,
+	msg->cprbx = static_cprbx;
+	msg->cprbx.rpl_datal = random_number_length,
+	msg->cprbx.domain = AP_QID_QUEUE(ap_dev->qid);
+	memcpy(msg->function_code, msg->hdr.function_code, 0x02);
+	msg->rule_length = 0x0a;
+	memcpy(msg->rule, "RANDOM  ", 8);
+	msg->verb_length = 0x02;
+	msg->key_length = 0x02;
+	ap_msg->length = sizeof *msg;
+}
+
+/**
  * Copy results from a type 86 ICA reply message back to user space.
  *
  * @zdev: crypto device pointer
@@ -452,7 +501,7 @@
 	reply_len = msg->length - 2;
 	if (reply_len > outputdatalength)
 		return -EINVAL;
-	/**
+	/*
 	 * For all encipher requests, the length of the ciphertext (reply_len)
 	 * will always equal the modulus length. For MEX decipher requests
 	 * the output needs to get padded. Minimum pad size is 10.
@@ -509,6 +558,26 @@
 	return 0;
 }
 
+static int convert_type86_rng(struct zcrypt_device *zdev,
+			  struct ap_message *reply,
+			  char *buffer)
+{
+	struct {
+		struct type86_hdr hdr;
+		struct type86_fmt2_ext fmt2;
+		struct CPRBX cprbx;
+	} __attribute__((packed)) *msg = reply->message;
+	char *data = reply->message;
+
+	if (msg->cprbx.ccp_rtcode != 0 || msg->cprbx.ccp_rscode != 0) {
+		PDEBUG("RNG response error on PCIXCC/CEX2C rc=%hu/rs=%hu\n",
+		       rc, rs);
+		return -EINVAL;
+	}
+	memcpy(buffer, data + msg->fmt2.offset2, msg->fmt2.count2);
+	return msg->fmt2.count2;
+}
+
 static int convert_response_ica(struct zcrypt_device *zdev,
 			    struct ap_message *reply,
 			    char __user *outputdata,
@@ -567,6 +636,31 @@
 	}
 }
 
+static int convert_response_rng(struct zcrypt_device *zdev,
+				 struct ap_message *reply,
+				 char *data)
+{
+	struct type86x_reply *msg = reply->message;
+
+	switch (msg->hdr.type) {
+	case TYPE82_RSP_CODE:
+	case TYPE88_RSP_CODE:
+		return -EINVAL;
+	case TYPE86_RSP_CODE:
+		if (msg->hdr.reply_code)
+			return -EINVAL;
+		if (msg->cprbx.cprb_ver_id == 0x02)
+			return convert_type86_rng(zdev, reply, data);
+		/* no break, incorrect cprb version is an unknown response */
+	default: /* Unknown response type, this should NEVER EVER happen */
+		PRINTK("Unrecognized Message Header: %08x%08x\n",
+		       *(unsigned int *) reply->message,
+		       *(unsigned int *) (reply->message+4));
+		zdev->online = 0;
+		return -EAGAIN;	/* repeat the request on a different device. */
+	}
+}
+
 /**
  * This function is called from the AP bus code after a crypto request
  * "msg" has finished with the reply message "reply".
@@ -736,6 +830,42 @@
 }
 
 /**
+ * The request distributor calls this function if it picked the PCIXCC/CEX2C
+ * device to generate random data.
+ * @zdev: pointer to zcrypt_device structure that identifies the
+ *	  PCIXCC/CEX2C device to the request distributor
+ * @buffer: pointer to a memory page to return random data
+ */
+
+static long zcrypt_pcixcc_rng(struct zcrypt_device *zdev,
+				    char *buffer)
+{
+	struct ap_message ap_msg;
+	struct response_type resp_type = {
+		.type = PCIXCC_RESPONSE_TYPE_XCRB,
+	};
+	int rc;
+
+	ap_msg.message = kmalloc(PCIXCC_MAX_XCRB_MESSAGE_SIZE, GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+	ap_msg.psmid = (((unsigned long long) current->pid) << 32) +
+				atomic_inc_return(&zcrypt_step);
+	ap_msg.private = &resp_type;
+	rng_type6CPRB_msgX(zdev->ap_dev, &ap_msg, ZCRYPT_RNG_BUFFER_SIZE);
+	init_completion(&resp_type.work);
+	ap_queue_message(zdev->ap_dev, &ap_msg);
+	rc = wait_for_completion_interruptible(&resp_type.work);
+	if (rc == 0)
+		rc = convert_response_rng(zdev, &ap_msg, buffer);
+	else
+		/* Signal pending. */
+		ap_cancel_message(zdev->ap_dev, &ap_msg);
+	kfree(ap_msg.message);
+	return rc;
+}
+
+/**
  * The crypto operations for a PCIXCC/CEX2C card.
  */
 static struct zcrypt_ops zcrypt_pcixcc_ops = {
@@ -744,6 +874,13 @@
 	.send_cprb = zcrypt_pcixcc_send_cprb,
 };
 
+static struct zcrypt_ops zcrypt_pcixcc_with_rng_ops = {
+	.rsa_modexpo = zcrypt_pcixcc_modexpo,
+	.rsa_modexpo_crt = zcrypt_pcixcc_modexpo_crt,
+	.send_cprb = zcrypt_pcixcc_send_cprb,
+	.rng = zcrypt_pcixcc_rng,
+};
+
 /**
  * Micro-code detection function. Its sends a message to a pcixcc card
  * to find out the microcode level.
@@ -859,6 +996,58 @@
 }
 
 /**
+ * Large random number detection function. Its sends a message to a pcixcc
+ * card to find out if large random numbers are supported.
+ * @ap_dev: pointer to the AP device.
+ *
+ * Returns 1 if large random numbers are supported, 0 if not and < 0 on error.
+ */
+static int zcrypt_pcixcc_rng_supported(struct ap_device *ap_dev)
+{
+	struct ap_message ap_msg;
+	unsigned long long psmid;
+	struct {
+		struct type86_hdr hdr;
+		struct type86_fmt2_ext fmt2;
+		struct CPRBX cprbx;
+	} __attribute__((packed)) *reply;
+	int rc, i;
+
+	ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!ap_msg.message)
+		return -ENOMEM;
+
+	rng_type6CPRB_msgX(ap_dev, &ap_msg, 4);
+	rc = ap_send(ap_dev->qid, 0x0102030405060708ULL, ap_msg.message,
+		     ap_msg.length);
+	if (rc)
+		goto out_free;
+
+	/* Wait for the test message to complete. */
+	for (i = 0; i < 2 * HZ; i++) {
+		msleep(1000 / HZ);
+		rc = ap_recv(ap_dev->qid, &psmid, ap_msg.message, 4096);
+		if (rc == 0 && psmid == 0x0102030405060708ULL)
+			break;
+	}
+
+	if (i >= 2 * HZ) {
+		/* Got no answer. */
+		rc = -ENODEV;
+		goto out_free;
+	}
+
+	reply = ap_msg.message;
+	if (reply->cprbx.ccp_rtcode == 0 && reply->cprbx.ccp_rscode == 0)
+		rc = 1;
+	else
+		rc = 0;
+out_free:
+	free_page((unsigned long) ap_msg.message);
+	return rc;
+}
+
+/**
  * Probe function for PCIXCC/CEX2C cards. It always accepts the AP device
  * since the bus_match already checked the hardware type. The PCIXCC
  * cards come in two flavours: micro code level 2 and micro code level 3.
@@ -874,7 +1063,6 @@
 	if (!zdev)
 		return -ENOMEM;
 	zdev->ap_dev = ap_dev;
-	zdev->ops = &zcrypt_pcixcc_ops;
 	zdev->online = 1;
 	if (ap_dev->device_type == AP_DEVICE_TYPE_PCIXCC) {
 		rc = zcrypt_pcixcc_mcl(ap_dev);
@@ -901,6 +1089,15 @@
 		zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE;
 		zdev->max_mod_size = PCIXCC_MAX_MOD_SIZE;
 	}
+	rc = zcrypt_pcixcc_rng_supported(ap_dev);
+	if (rc < 0) {
+		zcrypt_device_free(zdev);
+		return rc;
+	}
+	if (rc)
+		zdev->ops = &zcrypt_pcixcc_with_rng_ops;
+	else
+		zdev->ops = &zcrypt_pcixcc_ops;
 	ap_dev->reply = &zdev->reply;
 	ap_dev->private = zdev;
 	rc = zcrypt_device_register(zdev);
diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index d8a5c22..04a1d7b 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c
@@ -299,7 +299,7 @@
 	struct claw_privbk *privptr=NULL;
 
 #ifdef FUNCTRACE
-	printk(KERN_INFO "%s Enter\n",__FUNCTION__);
+	printk(KERN_INFO "%s Enter\n",__func__);
 #endif
 	CLAW_DBF_TEXT(2,setup,"probe");
 	if (!get_device(&cgdev->dev))
@@ -313,7 +313,7 @@
 		probe_error(cgdev);
 		put_device(&cgdev->dev);
 		printk(KERN_WARNING "Out of memory %s %s Exit Line %d \n",
-			cgdev->cdev[0]->dev.bus_id,__FUNCTION__,__LINE__);
+			cgdev->cdev[0]->dev.bus_id,__func__,__LINE__);
 		CLAW_DBF_TEXT_(2,setup,"probex%d",-ENOMEM);
 		return -ENOMEM;
 	}
@@ -323,7 +323,7 @@
                 probe_error(cgdev);
 		put_device(&cgdev->dev);
 		printk(KERN_WARNING "Out of memory %s %s Exit Line %d \n",
-			cgdev->cdev[0]->dev.bus_id,__FUNCTION__,__LINE__);
+			cgdev->cdev[0]->dev.bus_id,__func__,__LINE__);
 		CLAW_DBF_TEXT_(2,setup,"probex%d",-ENOMEM);
                 return -ENOMEM;
         }
@@ -340,7 +340,7 @@
 		probe_error(cgdev);
 		put_device(&cgdev->dev);
 		printk(KERN_WARNING "add_files failed %s %s Exit Line %d \n",
-			cgdev->cdev[0]->dev.bus_id,__FUNCTION__,__LINE__);
+			cgdev->cdev[0]->dev.bus_id,__func__,__LINE__);
 		CLAW_DBF_TEXT_(2,setup,"probex%d",rc);
 		return rc;
 	}
@@ -351,7 +351,7 @@
 	cgdev->dev.driver_data = privptr;
 #ifdef FUNCTRACE
         printk(KERN_INFO "claw:%s exit on line %d, "
-		"rc = 0\n",__FUNCTION__,__LINE__);
+		"rc = 0\n",__func__,__LINE__);
 #endif
 	CLAW_DBF_TEXT(2,setup,"prbext 0");
 
@@ -371,7 +371,7 @@
         struct chbk *p_ch;
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s enter\n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s enter\n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"claw_tx");
         p_ch=&privptr->channel[WRITE];
@@ -381,7 +381,7 @@
                 privptr->stats.tx_dropped++;
 #ifdef FUNCTRACE
                 printk(KERN_INFO "%s: %s() exit on line %d, rc = EIO\n",
-			dev->name,__FUNCTION__, __LINE__);
+			dev->name,__func__, __LINE__);
 #endif
 		CLAW_DBF_TEXT_(2,trace,"clawtx%d",-EIO);
                 return -EIO;
@@ -398,7 +398,7 @@
         spin_unlock_irqrestore(get_ccwdev_lock(p_ch->cdev), saveflags);
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s:%s exit on line %d, rc = %d\n",
-		dev->name, __FUNCTION__, __LINE__, rc);
+		dev->name, __func__, __LINE__, rc);
 #endif
 	CLAW_DBF_TEXT_(4,trace,"clawtx%d",rc);
         return rc;
@@ -460,7 +460,7 @@
 #ifdef IOTRACE
 		printk(KERN_INFO "%s: %s() Packed %d len %d\n",
 			p_env->ndev->name,
-			__FUNCTION__,pkt_cnt,new_skb->len);
+			__func__,pkt_cnt,new_skb->len);
 #endif
 	}
 	CLAW_DBF_TEXT(4,trace,"PackSKBx");
@@ -478,7 +478,7 @@
 	struct claw_privbk  *privptr=dev->priv;
 	int buff_size;
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter  \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter  \n",dev->name,__func__);
 #endif
 #ifdef DEBUGMSG
         printk(KERN_INFO "variable dev =\n");
@@ -491,14 +491,14 @@
 #ifdef FUNCTRACE
                 printk(KERN_INFO "%s:%s Exit on line %d, rc=EINVAL\n",
 		dev->name,
-		__FUNCTION__, __LINE__);
+		__func__, __LINE__);
 #endif
                 return -EINVAL;
         }
         dev->mtu = new_mtu;
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s:%s Exit on line %d\n",dev->name,
-	__FUNCTION__, __LINE__);
+	__func__, __LINE__);
 #endif
         return 0;
 }  /*   end of claw_change_mtu */
@@ -522,7 +522,7 @@
         struct ccwbk *p_buf;
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter  \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter  \n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"open");
 	if (!dev || (dev->name[0] == 0x00)) {
@@ -537,7 +537,7 @@
         	if (rc) {
                 	printk(KERN_INFO "%s:%s Exit on line %d, rc=ENOMEM\n",
 			dev->name,
-			__FUNCTION__, __LINE__);
+			__func__, __LINE__);
 			CLAW_DBF_TEXT(2,trace,"openmem");
                 	return -ENOMEM;
         	}
@@ -661,7 +661,7 @@
                 claw_clear_busy(dev);
 #ifdef FUNCTRACE
                 printk(KERN_INFO "%s:%s Exit on line %d, rc=EIO\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
 		CLAW_DBF_TEXT(2,trace,"open EIO");
                 return -EIO;
@@ -673,7 +673,7 @@
 
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s:%s Exit on line %d, rc=0\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"openok");
         return 0;
@@ -696,7 +696,7 @@
 
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s enter  \n",__FUNCTION__);
+	printk(KERN_INFO "%s enter  \n",__func__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"clawirq");
         /* Bypass all 'unsolicited interrupts' */
@@ -706,7 +706,7 @@
                         cdev->dev.bus_id,irb->scsw.cstat, irb->scsw.dstat);
 #ifdef FUNCTRACE
                 printk(KERN_INFO "claw: %s() "
-			"exit on line %d\n",__FUNCTION__,__LINE__);
+			"exit on line %d\n",__func__,__LINE__);
 #endif
 		CLAW_DBF_TEXT(2,trace,"badirq");
                 return;
@@ -752,7 +752,7 @@
 #endif
 #ifdef FUNCTRACE
 		printk(KERN_INFO "%s:%s Exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
 		CLAW_DBF_TEXT(2,trace,"chanchk");
                 /* return; */
@@ -777,7 +777,7 @@
 	     		(SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)))) {
 #ifdef FUNCTRACE
                                 printk(KERN_INFO "%s:%s Exit on line %d\n",
-					dev->name,__FUNCTION__,__LINE__);
+					dev->name,__func__,__LINE__);
 #endif
                                 return;
                         }
@@ -788,7 +788,7 @@
 #endif
 #ifdef FUNCTRACE
                         printk(KERN_INFO "%s:%s Exit on line %d\n",
-				dev->name,__FUNCTION__,__LINE__);
+				dev->name,__func__,__LINE__);
 #endif
 			CLAW_DBF_TEXT(4,trace,"stop");
                         return;
@@ -804,7 +804,7 @@
 	     		(SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)))) {
 #ifdef FUNCTRACE
 				printk(KERN_INFO "%s:%s Exit on line %d\n",
-					dev->name,__FUNCTION__,__LINE__);
+					dev->name,__func__,__LINE__);
 #endif
 				CLAW_DBF_TEXT(4,trace,"haltio");
                                 return;
@@ -838,7 +838,7 @@
 #endif
 #ifdef FUNCTRACE
                         printk(KERN_INFO "%s:%s Exit on line %d\n",
-				dev->name,__FUNCTION__,__LINE__);
+				dev->name,__func__,__LINE__);
 #endif
 			CLAW_DBF_TEXT(4,trace,"haltio");
                         return;
@@ -858,7 +858,7 @@
                                 }
 #ifdef FUNCTRACE
 				printk(KERN_INFO "%s:%s Exit on line %d\n",
-					dev->name,__FUNCTION__,__LINE__);
+					dev->name,__func__,__LINE__);
 #endif
 					CLAW_DBF_TEXT(4,trace,"notrdy");
                                         return;
@@ -874,7 +874,7 @@
 				}
 #ifdef FUNCTRACE
 				printk(KERN_INFO "%s:%s Exit on line %d\n",
-					dev->name,__FUNCTION__,__LINE__);
+					dev->name,__func__,__LINE__);
 #endif
 				CLAW_DBF_TEXT(4,trace,"PCI_read");
                                 return;
@@ -885,7 +885,7 @@
 	     		 (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)))) {
 #ifdef FUNCTRACE
 				printk(KERN_INFO "%s:%s Exit on line %d\n",
-					dev->name,__FUNCTION__,__LINE__);
+					dev->name,__func__,__LINE__);
 #endif
 				CLAW_DBF_TEXT(4,trace,"SPend_rd");
                                 return;
@@ -906,7 +906,7 @@
 #endif
 #ifdef FUNCTRACE
 			printk(KERN_INFO "%s:%s Exit on line %d\n",
-				dev->name,__FUNCTION__,__LINE__);
+				dev->name,__func__,__LINE__);
 #endif
 			CLAW_DBF_TEXT(4,trace,"RdIRQXit");
                         return;
@@ -929,7 +929,7 @@
                                 }
 #ifdef FUNCTRACE
 				printk(KERN_INFO "%s:%s Exit on line %d\n",
-					dev->name,__FUNCTION__,__LINE__);
+					dev->name,__func__,__LINE__);
 #endif
 				CLAW_DBF_TEXT(4,trace,"rstrtwrt");
                                 return;
@@ -946,7 +946,7 @@
 	     		(SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)))) {
 #ifdef FUNCTRACE
 				printk(KERN_INFO "%s:%s Exit on line %d\n",
-					dev->name,__FUNCTION__,__LINE__);
+					dev->name,__func__,__LINE__);
 #endif
 				CLAW_DBF_TEXT(4,trace,"writeUE");
                                 return;
@@ -969,7 +969,7 @@
 #endif
 #ifdef FUNCTRACE
 			printk(KERN_INFO "%s:%s Exit on line %d\n",
-				dev->name,__FUNCTION__,__LINE__);
+				dev->name,__func__,__LINE__);
 #endif
 			CLAW_DBF_TEXT(4,trace,"StWtExit");
                         return;
@@ -978,7 +978,7 @@
 				"state=%d\n",dev->name,p_ch->claw_state);
 #ifdef FUNCTRACE
 			printk(KERN_INFO "%s:%s Exit on line %d\n",
-				dev->name,__FUNCTION__,__LINE__);
+				dev->name,__func__,__LINE__);
 #endif
 			CLAW_DBF_TEXT(2,trace,"badIRQ");
                         return;
@@ -1001,7 +1001,7 @@
 	p_ch = (struct chbk *) data;
         dev = (struct net_device *)p_ch->ndev;
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter  \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter  \n",dev->name,__func__);
 #endif
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s: variable p_ch =\n",dev->name);
@@ -1021,7 +1021,7 @@
 	CLAW_DBF_TEXT(4,trace,"TskletXt");
 #ifdef FUNCTRACE
 	printk(KERN_INFO "%s:%s Exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
         return;
 }       /*    end of claw_irq_bh    */
@@ -1048,7 +1048,7 @@
         if (!privptr)
                 return 0;
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter  \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter  \n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"release");
 #ifdef DEBUGMSG
@@ -1090,7 +1090,7 @@
 	if(privptr->buffs_alloc != 1) {
 #ifdef FUNCTRACE
 	printk(KERN_INFO "%s:%s Exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
 		CLAW_DBF_TEXT(4,trace,"none2fre");
 		return 0;
@@ -1171,7 +1171,7 @@
         }
 #ifdef FUNCTRACE
 	printk(KERN_INFO "%s:%s Exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"rlsexit");
         return 0;
@@ -1192,7 +1192,7 @@
 
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter\n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter\n",dev->name,__func__);
         printk(KERN_INFO "claw: variable p_ch =\n");
         dumpit((char *) p_ch, sizeof(struct chbk));
 #endif
@@ -1200,20 +1200,20 @@
         if (p_ch->claw_state == CLAW_STOP) {
 #ifdef FUNCTRACE
 		printk(KERN_INFO "%s:%s Exit on line %d\n",
-			dev->name,__FUNCTION__,__LINE__);
+			dev->name,__func__,__LINE__);
 #endif
         	return;
         }
 #ifdef DEBUGMSG
         printk( KERN_INFO "%s:%s  state-%02x\n" ,
 		dev->name,
-		__FUNCTION__,
+		__func__,
 		p_ch->claw_state);
 #endif
 	claw_strt_out_IO( dev );
 #ifdef FUNCTRACE
 	printk(KERN_INFO "%s:%s Exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"rtry_xit");
         return;
@@ -1235,7 +1235,7 @@
 	int	rc;
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter  \n",p_ch->ndev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter  \n",p_ch->ndev->name,__func__);
         printk(KERN_INFO "%s: variable p_ch =\n",p_ch->ndev->name);
         dumpit((char *) p_ch, sizeof(struct chbk));
 #endif
@@ -1262,7 +1262,7 @@
 
 #ifdef FUNCTRACE
 	printk(KERN_INFO "%s:%s Exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
         return;
 }      /* end of claw_write_next      */
@@ -1276,7 +1276,7 @@
 claw_timer ( struct chbk * p_ch )
 {
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Entry\n",p_ch->ndev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Entry\n",p_ch->ndev->name,__func__);
         printk(KERN_INFO "%s: variable p_ch =\n",p_ch->ndev->name);
         dumpit((char *) p_ch, sizeof(struct chbk));
 #endif
@@ -1285,7 +1285,7 @@
         wake_up(&p_ch->wait);
 #ifdef FUNCTRACE
 	printk(KERN_INFO "%s:%s Exit on line %d\n",
-		p_ch->ndev->name,__FUNCTION__,__LINE__);
+		p_ch->ndev->name,__func__,__LINE__);
 #endif
         return;
 }      /* end of claw_timer  */
@@ -1312,7 +1312,7 @@
 	int	order_of_mag=1;		/* assume 2 pages */
 	int	nump=2;
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s Enter pages = %d \n",__FUNCTION__,num_of_pages);
+	printk(KERN_INFO "%s Enter pages = %d \n",__func__,num_of_pages);
 #endif
 	CLAW_DBF_TEXT_(5,trace,"pages%d",num_of_pages);
 	if (num_of_pages == 1)   {return 0; }  /* magnitude of 0 = 1 page */
@@ -1327,7 +1327,7 @@
 	if (order_of_mag > 9) { order_of_mag = 9; }  /* I know it's paranoid */
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s Exit on line %d, order = %d\n",
-	__FUNCTION__,__LINE__, order_of_mag);
+	__func__,__LINE__, order_of_mag);
 #endif
 	CLAW_DBF_TEXT_(5,trace,"mag%d",order_of_mag);
 	return order_of_mag;
@@ -1349,7 +1349,7 @@
         struct ccwbk*  p_buf;
 #endif
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter  \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter  \n",dev->name,__func__);
 #endif
 #ifdef DEBUGMSG
         printk(KERN_INFO "dev\n");
@@ -1369,7 +1369,7 @@
         if ( p_first==NULL) {
 #ifdef FUNCTRACE
 		printk(KERN_INFO "%s:%s Exit on line %d\n",
-			dev->name,__FUNCTION__,__LINE__);
+			dev->name,__func__,__LINE__);
 #endif
 		CLAW_DBF_TEXT(4,trace,"addexit");
                 return 0;
@@ -1400,9 +1400,9 @@
         if ( privptr-> p_read_active_first ==NULL ) {
 #ifdef DEBUGMSG
                 printk(KERN_INFO "%s:%s p_read_active_first == NULL \n",
-			dev->name,__FUNCTION__);
+			dev->name,__func__);
                 printk(KERN_INFO "%s:%s Read active first/last changed \n",
-			dev->name,__FUNCTION__);
+			dev->name,__func__);
 #endif
                 privptr-> p_read_active_first= p_first;  /*    set new first */
                 privptr-> p_read_active_last = p_last;   /*    set new last  */
@@ -1411,7 +1411,7 @@
 
 #ifdef DEBUGMSG
                 printk(KERN_INFO "%s:%s Read in progress \n",
-		dev->name,__FUNCTION__);
+		dev->name,__func__);
 #endif
                 /* set up TIC ccw  */
                 temp_ccw.cda= (__u32)__pa(&p_first->read);
@@ -1450,15 +1450,15 @@
                 privptr->p_read_active_last=p_last;
         } /* end of if ( privptr-> p_read_active_first ==NULL)  */
 #ifdef IOTRACE
-        printk(KERN_INFO "%s:%s  dump p_last CCW BK \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s  dump p_last CCW BK \n",dev->name,__func__);
         dumpit((char *)p_last, sizeof(struct ccwbk));
-        printk(KERN_INFO "%s:%s  dump p_end CCW BK \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s  dump p_end CCW BK \n",dev->name,__func__);
         dumpit((char *)p_end, sizeof(struct endccw));
 
-        printk(KERN_INFO "%s:%s dump p_first CCW BK \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s dump p_first CCW BK \n",dev->name,__func__);
         dumpit((char *)p_first, sizeof(struct ccwbk));
         printk(KERN_INFO "%s:%s Dump Active CCW chain \n",
-		dev->name,__FUNCTION__);
+		dev->name,__func__);
         p_buf=privptr->p_read_active_first;
         while (p_buf!=NULL) {
                 dumpit((char *)p_buf, sizeof(struct ccwbk));
@@ -1467,7 +1467,7 @@
 #endif
 #ifdef FUNCTRACE
 	printk(KERN_INFO "%s:%s Exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"addexit");
         return 0;
@@ -1483,7 +1483,7 @@
 {
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s: %s() > enter  \n",
-		cdev->dev.bus_id,__FUNCTION__);
+		cdev->dev.bus_id,__func__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"ccwret");
 #ifdef DEBUGMSG
@@ -1516,7 +1516,7 @@
         }
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s: %s() > exit on line %d\n",
-		cdev->dev.bus_id,__FUNCTION__,__LINE__);
+		cdev->dev.bus_id,__func__,__LINE__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"ccwret");
 }    /*    end of ccw_check_return_code   */
@@ -1531,7 +1531,7 @@
 	struct net_device *dev = p_ch->ndev;
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s: %s() > enter\n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s: %s() > enter\n",dev->name,__func__);
 #endif
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s: variable dev =\n",dev->name);
@@ -1578,7 +1578,7 @@
 
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s: %s() exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
 }   /*    end of ccw_check_unit_check    */
 
@@ -1706,7 +1706,7 @@
 	int    rc=0;
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s > enter  \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s > enter  \n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(2,setup,"findlink");
 #ifdef DEBUGMSG
@@ -1739,7 +1739,7 @@
 
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s:%s Exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
         return 0;
 }    /*    end of find_link    */
@@ -1773,7 +1773,7 @@
         struct ccwbk                   *p_buf;
 #endif
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s: %s() > enter\n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s: %s() > enter\n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"hw_tx");
 #ifdef DEBUGMSG
@@ -1787,7 +1787,7 @@
         p_ch=(struct chbk *)&privptr->channel[WRITE];
 	p_env =privptr->p_env;
 #ifdef IOTRACE
-        printk(KERN_INFO "%s: %s() dump sk_buff  \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s: %s() dump sk_buff  \n",dev->name,__func__);
         dumpit((char *)skb ,sizeof(struct sk_buff));
 #endif
 	claw_free_wrt_buf(dev);	/* Clean up free chain if posible */
@@ -1877,7 +1877,7 @@
         while (len_of_data > 0) {
 #ifdef DEBUGMSG
                 printk(KERN_INFO "%s: %s() length-of-data is %ld \n",
-			dev->name ,__FUNCTION__,len_of_data);
+			dev->name ,__func__,len_of_data);
                 dumpit((char *)pDataAddress ,64);
 #endif
                 p_this_ccw=privptr->p_write_free_chain;  /* get a block */
@@ -1913,7 +1913,7 @@
                 p_last_ccw=p_this_ccw;      /* save new last block */
 #ifdef IOTRACE
 		printk(KERN_INFO "%s: %s() > CCW and Buffer %ld bytes long \n",
-			dev->name,__FUNCTION__,bytesInThisBuffer);
+			dev->name,__func__,bytesInThisBuffer);
                 dumpit((char *)p_this_ccw, sizeof(struct ccwbk));
                 dumpit((char *)p_this_ccw->p_buffer, 64);
 #endif
@@ -1998,7 +1998,7 @@
 
 #ifdef IOTRACE
         printk(KERN_INFO "%s: %s() >  Dump Active CCW chain \n",
-		dev->name,__FUNCTION__);
+		dev->name,__func__);
         p_buf=privptr->p_write_active_first;
         while (p_buf!=NULL) {
                 dumpit((char *)p_buf, sizeof(struct ccwbk));
@@ -2018,7 +2018,7 @@
         /*      if write free count is zero , set NOBUFFER       */
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s: %s() > free_count is %d\n",
-		dev->name,__FUNCTION__,
+		dev->name,__func__,
 		(int) privptr->write_free_count );
 #endif
 	if (privptr->write_free_count==0) {
@@ -2029,7 +2029,7 @@
 Done:
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s: %s() > exit on line %d, rc = %d \n",
-		dev->name,__FUNCTION__,__LINE__, rc);
+		dev->name,__func__,__LINE__, rc);
 #endif
 	return(rc);
 }    /*    end of claw_hw_tx    */
@@ -2063,7 +2063,7 @@
         addr_t   real_TIC_address;
         int i,j;
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s: %s() enter  \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s: %s() enter  \n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"init_ccw");
 #ifdef DEBUGMSG
@@ -2097,15 +2097,15 @@
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s: %s() "
 		"ccw_blocks_required=%d\n",
-		dev->name,__FUNCTION__,
+		dev->name,__func__,
 		ccw_blocks_required);
         printk(KERN_INFO "%s: %s() "
 		"PAGE_SIZE=0x%x\n",
-		dev->name,__FUNCTION__,
+		dev->name,__func__,
 		(unsigned int)PAGE_SIZE);
         printk(KERN_INFO "%s: %s() > "
 		"PAGE_MASK=0x%x\n",
-		dev->name,__FUNCTION__,
+		dev->name,__func__,
 		(unsigned int)PAGE_MASK);
 #endif
         /*
@@ -2117,10 +2117,10 @@
 
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s: %s() > ccw_blocks_perpage=%d\n",
-		dev->name,__FUNCTION__,
+		dev->name,__func__,
 		ccw_blocks_perpage);
         printk(KERN_INFO "%s: %s() > ccw_pages_required=%d\n",
-		dev->name,__FUNCTION__,
+		dev->name,__func__,
 		ccw_pages_required);
 #endif
         /*
@@ -2156,29 +2156,29 @@
 #ifdef DEBUGMSG
         if (privptr->p_env->read_size < PAGE_SIZE) {
             printk(KERN_INFO "%s: %s() reads_perpage=%d\n",
-	    	dev->name,__FUNCTION__,
+		dev->name,__func__,
 		claw_reads_perpage);
         }
         else {
             printk(KERN_INFO "%s: %s() pages_perread=%d\n",
-	    	dev->name,__FUNCTION__,
+		dev->name,__func__,
 		privptr->p_buff_pages_perread);
         }
         printk(KERN_INFO "%s: %s() read_pages=%d\n",
-		dev->name,__FUNCTION__,
+		dev->name,__func__,
 		claw_read_pages);
         if (privptr->p_env->write_size < PAGE_SIZE) {
             printk(KERN_INFO "%s: %s() writes_perpage=%d\n",
-	    	dev->name,__FUNCTION__,
+		dev->name,__func__,
 		claw_writes_perpage);
         }
         else {
             printk(KERN_INFO "%s: %s() pages_perwrite=%d\n",
-	    	dev->name,__FUNCTION__,
+		dev->name,__func__,
 		privptr->p_buff_pages_perwrite);
         }
         printk(KERN_INFO "%s: %s() write_pages=%d\n",
-		dev->name,__FUNCTION__,
+		dev->name,__func__,
 		claw_write_pages);
 #endif
 
@@ -2194,12 +2194,12 @@
                         printk(KERN_INFO "%s: %s()  "
 				"__get_free_pages for CCWs failed : "
 				"pages is %d\n",
-                                dev->name,__FUNCTION__,
+				dev->name,__func__,
 				ccw_pages_required );
 #ifdef FUNCTRACE
                         printk(KERN_INFO "%s: %s() > "
 				"exit on line %d, rc = ENOMEM\n",
-				dev->name,__FUNCTION__,
+				dev->name,__func__,
 				 __LINE__);
 #endif
                         return -ENOMEM;
@@ -2218,7 +2218,7 @@
         /*                              Initialize ending CCW block       */
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s: %s() begin initialize ending CCW blocks\n",
-		dev->name,__FUNCTION__);
+		dev->name,__func__);
 #endif
 
         p_endccw=privptr->p_end_ccw;
@@ -2276,7 +2276,7 @@
 
 #ifdef IOTRACE
         printk(KERN_INFO "%s: %s() dump claw ending CCW BK \n",
-		dev->name,__FUNCTION__);
+		dev->name,__func__);
         dumpit((char *)p_endccw, sizeof(struct endccw));
 #endif
 
@@ -2287,7 +2287,7 @@
 
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s: %s()  Begin build a chain of CCW buffer \n",
-		dev->name,__FUNCTION__);
+		dev->name,__func__);
 #endif
         p_buff=privptr->p_buff_ccw;
 
@@ -2306,7 +2306,7 @@
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s: %s() "
 		"End build a chain of CCW buffer \n",
-			dev->name,__FUNCTION__);
+			dev->name,__func__);
         p_buf=p_free_chain;
         while (p_buf!=NULL) {
                 dumpit((char *)p_buf, sizeof(struct ccwbk));
@@ -2321,7 +2321,7 @@
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s: %s() "
 		"Begin initialize ClawSignalBlock \n",
-		dev->name,__FUNCTION__);
+		dev->name,__func__);
 #endif
         if (privptr->p_claw_signal_blk==NULL) {
                 privptr->p_claw_signal_blk=p_free_chain;
@@ -2334,7 +2334,7 @@
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s: %s() >  End initialize "
 	 	"ClawSignalBlock\n",
-		dev->name,__FUNCTION__);
+		dev->name,__func__);
         dumpit((char *)privptr->p_claw_signal_blk, sizeof(struct ccwbk));
 #endif
 
@@ -2349,14 +2349,14 @@
                 if (privptr->p_buff_write==NULL) {
                         printk(KERN_INFO "%s: %s() __get_free_pages for write"
 				" bufs failed : get is for %d pages\n",
-                                dev->name,__FUNCTION__,claw_write_pages );
+				dev->name,__func__,claw_write_pages );
                         free_pages((unsigned long)privptr->p_buff_ccw,
 			   (int)pages_to_order_of_mag(privptr->p_buff_ccw_num));
                         privptr->p_buff_ccw=NULL;
 #ifdef FUNCTRACE
                         printk(KERN_INFO "%s: %s() > exit on line %d,"
 			 	"rc = ENOMEM\n",
-				dev->name,__FUNCTION__,__LINE__);
+				dev->name,__func__,__LINE__);
 #endif
                         return -ENOMEM;
                 }
@@ -2369,7 +2369,7 @@
 			ccw_pages_required * PAGE_SIZE);
 #ifdef DEBUGMSG
                 printk(KERN_INFO "%s: %s() Begin build claw write free "
-			"chain \n",dev->name,__FUNCTION__);
+			"chain \n",dev->name,__func__);
 #endif
                 privptr->p_write_free_chain=NULL;
 
@@ -2409,14 +2409,14 @@
 #ifdef IOTRACE
                    printk(KERN_INFO "%s:%s __get_free_pages "
 		    "for writes buf: get for %d pages\n",
-		    dev->name,__FUNCTION__,
+		    dev->name,__func__,
 		    privptr->p_buff_pages_perwrite);
 #endif
                    if (p_buff==NULL) {
 			printk(KERN_INFO "%s:%s __get_free_pages "
 			 	"for writes buf failed : get is for %d pages\n",
 				dev->name,
-				__FUNCTION__,
+				__func__,
 				privptr->p_buff_pages_perwrite );
                         free_pages((unsigned long)privptr->p_buff_ccw,
 			      (int)pages_to_order_of_mag(
@@ -2433,7 +2433,7 @@
 #ifdef FUNCTRACE
                         printk(KERN_INFO "%s: %s exit on line %d, rc = ENOMEM\n",
 			dev->name,
-			__FUNCTION__,
+			__func__,
 			__LINE__);
 #endif
                         return -ENOMEM;
@@ -2466,7 +2466,7 @@
 
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s:%s  End build claw write free chain \n",
-	dev->name,__FUNCTION__);
+	dev->name,__func__);
         p_buf=privptr->p_write_free_chain;
         while (p_buf!=NULL) {
                 dumpit((char *)p_buf, sizeof(struct ccwbk));
@@ -2485,7 +2485,7 @@
                         printk(KERN_INFO "%s: %s() "
 			 	"__get_free_pages for read buf failed : "
 			 	"get is for %d pages\n",
-                                dev->name,__FUNCTION__,claw_read_pages );
+				dev->name,__func__,claw_read_pages );
                         free_pages((unsigned long)privptr->p_buff_ccw,
 				(int)pages_to_order_of_mag(
 					privptr->p_buff_ccw_num));
@@ -2497,7 +2497,7 @@
                         privptr->p_buff_write=NULL;
 #ifdef FUNCTRACE
                         printk(KERN_INFO "%s: %s() > exit on line %d, rc ="
-				" ENOMEM\n",dev->name,__FUNCTION__,__LINE__);
+				" ENOMEM\n",dev->name,__func__,__LINE__);
 #endif
                         return -ENOMEM;
                 }
@@ -2509,7 +2509,7 @@
                 */
 #ifdef DEBUGMSG
                 printk(KERN_INFO "%s: %s() Begin build claw read free chain \n",
-			dev->name,__FUNCTION__);
+			dev->name,__func__);
 #endif
                 p_buff=privptr->p_buff_read;
                 for (i=0 ; i< privptr->p_env->read_buffers ; i++) {
@@ -2590,7 +2590,7 @@
 
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s: %s() Begin build claw read free chain \n",
-		dev->name,__FUNCTION__);
+		dev->name,__func__);
 #endif
                 for (i=0 ; i< privptr->p_env->read_buffers ; i++) {
                         p_buff = (void *)__get_free_pages(__GFP_DMA,
@@ -2598,7 +2598,7 @@
                         if (p_buff==NULL) {
                                 printk(KERN_INFO "%s: %s() __get_free_pages for read "
 					"buf failed : get is for %d pages\n",
-					dev->name,__FUNCTION__,
+					dev->name,__func__,
                                         privptr->p_buff_pages_perread );
                                 free_pages((unsigned long)privptr->p_buff_ccw,
 					(int)pages_to_order_of_mag(privptr->p_buff_ccw_num));
@@ -2622,7 +2622,7 @@
                                 privptr->p_buff_write=NULL;
 #ifdef FUNCTRACE
                                 printk(KERN_INFO "%s: %s() exit on line %d, rc = ENOMEM\n",
-					dev->name,__FUNCTION__,
+					dev->name,__func__,
 					__LINE__);
 #endif
                                 return -ENOMEM;
@@ -2695,7 +2695,7 @@
         }       /*  pBuffread = NULL */
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s: %s() >  End build claw read free chain \n",
-		dev->name,__FUNCTION__);
+		dev->name,__func__);
         p_buf=p_first_CCWB;
         while (p_buf!=NULL) {
                 dumpit((char *)p_buf, sizeof(struct ccwbk));
@@ -2707,7 +2707,7 @@
 	privptr->buffs_alloc = 1;
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s: %s() exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
         return 0;
 }    /*    end of init_ccw_bk */
@@ -2723,11 +2723,11 @@
 {
   struct claw_privbk *privptr;
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s enter  \n",__FUNCTION__);
+	printk(KERN_INFO "%s enter  \n",__func__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"proberr");
 #ifdef DEBUGMSG
-        printk(KERN_INFO "%s variable cgdev =\n",__FUNCTION__);
+	printk(KERN_INFO "%s variable cgdev =\n",__func__);
         dumpit((char *) cgdev, sizeof(struct ccwgroup_device));
 #endif
         privptr=(struct claw_privbk *)cgdev->dev.driver_data;
@@ -2741,7 +2741,7 @@
         }
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s > exit on line %d\n",
-		 __FUNCTION__,__LINE__);
+		 __func__,__LINE__);
 #endif
 
         return;
@@ -2772,7 +2772,7 @@
         struct chbk *p_ch = NULL;
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s: %s() > enter  \n",
-		dev->name,__FUNCTION__);
+		dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(2,setup,"clw_cntl");
 #ifdef DEBUGMSG
@@ -2794,7 +2794,7 @@
 #ifdef FUNCTRACE
                 printk(KERN_INFO "%s: %s() > "
 			"exit on line %d, rc=0\n",
-			dev->name,__FUNCTION__,__LINE__);
+			dev->name,__func__,__LINE__);
 #endif
                 return 0;
         }
@@ -3057,7 +3057,7 @@
 
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s: %s() exit on line %d, rc = 0\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
 
         return 0;
@@ -3080,7 +3080,7 @@
         struct sk_buff 			*skb;
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s > enter  \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s > enter  \n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(2,setup,"sndcntl");
 #ifdef DEBUGMSG
@@ -3143,10 +3143,10 @@
         skb = dev_alloc_skb(sizeof(struct clawctl));
         if (!skb) {
                 printk(  "%s:%s low on mem, returning...\n",
-			dev->name,__FUNCTION__);
+			dev->name,__func__);
 #ifdef DEBUG
                 printk(KERN_INFO "%s:%s Exit, rc = ENOMEM\n",
-			dev->name,__FUNCTION__);
+			dev->name,__func__);
 #endif
                 return -ENOMEM;
         }
@@ -3162,7 +3162,7 @@
         	claw_hw_tx(skb, dev, 0);
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s:%s Exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
 
         return 0;
@@ -3180,7 +3180,7 @@
         struct clawctl 	   *p_ctl;
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter  \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter  \n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(2,setup,"snd_conn");
 #ifdef  DEBUGMSG
@@ -3193,7 +3193,7 @@
         if ( privptr->system_validate_comp==0x00 ) {
 #ifdef FUNCTRACE
                 printk(KERN_INFO "%s:%s Exit on line %d, rc = 1\n",
-			dev->name,__FUNCTION__,__LINE__);
+			dev->name,__func__,__LINE__);
 #endif
                 return rc;
         }
@@ -3209,7 +3209,7 @@
        			HOST_APPL_NAME, privptr->p_env->api_type);
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s:%s Exit on line %d, rc = %d\n",
-		dev->name,__FUNCTION__,__LINE__, rc);
+		dev->name,__func__,__LINE__, rc);
 #endif
         return rc;
 
@@ -3228,7 +3228,7 @@
         struct conncmd *  p_connect;
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter\n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter\n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(2,setup,"snd_dsc");
 #ifdef  DEBUGMSG
@@ -3244,7 +3244,7 @@
                 p_connect->host_name, p_connect->WS_name);
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s:%s Exit on line %d, rc = %d\n",
-		dev->name,__FUNCTION__, __LINE__, rc);
+		dev->name,__func__, __LINE__, rc);
 #endif
         return rc;
 }     /*   end of claw_snd_disc    */
@@ -3265,7 +3265,7 @@
 
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s:%s Enter\n",
-		dev->name,__FUNCTION__);
+		dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(2,setup,"chkresp");
 #ifdef DEBUGMSG
@@ -3285,7 +3285,7 @@
 		p_env->adapter_name  );
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s:%s Exit on line %d, rc = %d\n",
-		dev->name,__FUNCTION__,__LINE__, rc);
+		dev->name,__func__,__LINE__, rc);
 #endif
         return rc;
 }     /*    end of claw_snd_sys_validate_rsp    */
@@ -3301,7 +3301,7 @@
         int rc;
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter\n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter\n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(2,setup,"conn_req");
 #ifdef DEBUGMSG
@@ -3311,7 +3311,7 @@
         rc=claw_snd_conn_req(dev, 1);
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s:%s Exit on line %d, rc = %d\n",
-		dev->name,__FUNCTION__,__LINE__, rc);
+		dev->name,__func__,__LINE__, rc);
 #endif
         return rc;
 }    /*   end of claw_strt_conn_req   */
@@ -3327,13 +3327,13 @@
 {
         struct claw_privbk *privptr;
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter\n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter\n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"stats");
         privptr = dev->priv;
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s:%s Exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
         return &privptr->stats;
 }     /*   end of claw_stats   */
@@ -3366,7 +3366,7 @@
 	int     p=0;
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s enter  \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s enter  \n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"unpkread");
         p_first_ccw=NULL;
@@ -3408,7 +3408,7 @@
                 if ((p_this_ccw->header.opcode & MORE_to_COME_FLAG)!=0) {
 #ifdef DEBUGMSG
                         printk(KERN_INFO "%s: %s > More_to_come is ON\n",
-			dev->name,__FUNCTION__);
+			dev->name,__func__);
 #endif
                         mtc_this_frm=1;
                         if (p_this_ccw->header.length!=
@@ -3435,7 +3435,7 @@
 #ifdef DEBUGMSG
                         printk(KERN_INFO "%s:%s goto next "
 				"frame from MoretoComeSkip \n",
-				dev->name,__FUNCTION__);
+				dev->name,__func__);
 #endif
                         goto NextFrame;
                 }
@@ -3445,7 +3445,7 @@
 #ifdef DEBUGMSG
                         printk(KERN_INFO "%s:%s goto next "
 				"frame from claw_process_control \n",
-				dev->name,__FUNCTION__);
+				dev->name,__func__);
 #endif
 			CLAW_DBF_TEXT(4,trace,"UnpkCntl");
                         goto NextFrame;
@@ -3468,7 +3468,7 @@
                 if (privptr->mtc_logical_link<0) {
 #ifdef DEBUGMSG
                 printk(KERN_INFO "%s: %s mtc_logical_link < 0  \n",
-			dev->name,__FUNCTION__);
+			dev->name,__func__);
 #endif
 
                 /*
@@ -3487,7 +3487,7 @@
                         printk(KERN_INFO "%s: %s > goto next "
 				"frame from MoretoComeSkip \n",
 				dev->name,
-				__FUNCTION__);
+				__func__);
                         printk(KERN_INFO "      bytes_to_mov %d > (MAX_ENVELOPE_"
 				"SIZE-privptr->mtc_offset %d)\n",
 				bytes_to_mov,(MAX_ENVELOPE_SIZE- privptr->mtc_offset));
@@ -3505,13 +3505,13 @@
 		}
 #ifdef DEBUGMSG
                 printk(KERN_INFO "%s: %s() received data \n",
-			dev->name,__FUNCTION__);
+			dev->name,__func__);
 		if (p_env->packing == DO_PACKED)
 			dumpit((char *)p_packd+sizeof(struct clawph),32);
 		else
 	                dumpit((char *)p_this_ccw->p_buffer, 32);
 		printk(KERN_INFO "%s: %s() bytelength %d \n",
-			dev->name,__FUNCTION__,bytes_to_mov);
+			dev->name,__func__,bytes_to_mov);
 #endif
                 if (mtc_this_frm==0) {
                         len_of_data=privptr->mtc_offset+bytes_to_mov;
@@ -3530,13 +3530,13 @@
 #ifdef DEBUGMSG
                                 printk(KERN_INFO "%s: %s() netif_"
 					"rx(skb) completed \n",
-					dev->name,__FUNCTION__);
+					dev->name,__func__);
 #endif
                         }
                         else {
                                 privptr->stats.rx_dropped++;
                                 printk(KERN_WARNING "%s: %s() low on memory\n",
-				dev->name,__FUNCTION__);
+				dev->name,__func__);
                         }
                         privptr->mtc_offset=0;
                         privptr->mtc_logical_link=-1;
@@ -3575,10 +3575,10 @@
 
 #ifdef IOTRACE
         printk(KERN_INFO "%s:%s processed frame is %d \n",
-		dev->name,__FUNCTION__,i);
+		dev->name,__func__,i);
         printk(KERN_INFO "%s:%s  F:%lx L:%lx\n",
 		dev->name,
-		__FUNCTION__,
+		__func__,
 		(unsigned long)p_first_ccw,
 		(unsigned long)p_last_ccw);
 #endif
@@ -3588,7 +3588,7 @@
         claw_strt_read(dev, LOCK_YES);
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s: %s exit on line %d\n",
-		dev->name, __FUNCTION__, __LINE__);
+		dev->name, __func__, __LINE__);
 #endif
         return;
 }     /*  end of unpack_read   */
@@ -3610,7 +3610,7 @@
         p_ch=&privptr->channel[READ];
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter  \n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter  \n",dev->name,__func__);
         printk(KERN_INFO "%s: variable lock = %d, dev =\n",dev->name, lock);
         dumpit((char *) dev, sizeof(struct net_device));
 #endif
@@ -3626,7 +3626,7 @@
         }
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s:%s state-%02x\n" ,
-		dev->name,__FUNCTION__, p_ch->claw_state);
+		dev->name,__func__, p_ch->claw_state);
 #endif
         if (lock==LOCK_YES) {
                 spin_lock_irqsave(get_ccwdev_lock(p_ch->cdev), saveflags);
@@ -3634,7 +3634,7 @@
         if (test_and_set_bit(0, (void *)&p_ch->IO_active) == 0) {
 #ifdef DEBUGMSG
                 printk(KERN_INFO "%s: HOT READ started in %s\n" ,
-			dev->name,__FUNCTION__);
+			dev->name,__func__);
                 p_clawh=(struct clawh *)privptr->p_claw_signal_blk;
                 dumpit((char *)&p_clawh->flag , 1);
 #endif
@@ -3650,7 +3650,7 @@
 	else {
 #ifdef DEBUGMSG
 		printk(KERN_INFO "%s: No READ started by %s() In progress\n" ,
-			dev->name,__FUNCTION__);
+			dev->name,__func__);
 #endif
 		CLAW_DBF_TEXT(2,trace,"ReadAct");
 	}
@@ -3660,7 +3660,7 @@
         }
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s:%s Exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
 	CLAW_DBF_TEXT(4,trace,"StRdExit");
         return;
@@ -3681,7 +3681,7 @@
         struct ccwbk   	*p_first_ccw;
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter\n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter\n",dev->name,__func__);
 #endif
 	if (!dev) {
 		return;
@@ -3691,7 +3691,7 @@
 
 #ifdef DEBUGMSG
         printk(KERN_INFO "%s:%s state-%02x\n" ,
-		dev->name,__FUNCTION__,p_ch->claw_state);
+		dev->name,__func__,p_ch->claw_state);
 #endif
         CLAW_DBF_TEXT(4,trace,"strt_io");
         p_first_ccw=privptr->p_write_active_first;
@@ -3701,14 +3701,14 @@
         if (p_first_ccw == NULL) {
 #ifdef FUNCTRACE
                 printk(KERN_INFO "%s:%s Exit on line %d\n",
-			dev->name,__FUNCTION__,__LINE__);
+			dev->name,__func__,__LINE__);
 #endif
                 return;
         }
         if (test_and_set_bit(0, (void *)&p_ch->IO_active) == 0) {
                 parm = (unsigned long) p_ch;
 #ifdef DEBUGMSG
-                printk(KERN_INFO "%s:%s do_io \n" ,dev->name,__FUNCTION__);
+		printk(KERN_INFO "%s:%s do_io \n" ,dev->name,__func__);
                 dumpit((char *)p_first_ccw, sizeof(struct ccwbk));
 #endif
 		CLAW_DBF_TEXT(2,trace,"StWrtIO");
@@ -3721,7 +3721,7 @@
         dev->trans_start = jiffies;
 #ifdef FUNCTRACE
 	printk(KERN_INFO "%s:%s Exit on line %d\n",
-		dev->name,__FUNCTION__,__LINE__);
+		dev->name,__func__,__LINE__);
 #endif
 
         return;
@@ -3745,7 +3745,7 @@
         struct ccwbk*p_buf;
 #endif
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter\n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter\n",dev->name,__func__);
         printk(KERN_INFO "%s: free count = %d  variable dev =\n",
 		dev->name,privptr->write_free_count);
 #endif
@@ -3798,7 +3798,7 @@
                 privptr->p_write_active_last=NULL;
 #ifdef DEBUGMSG
                 printk(KERN_INFO "%s:%s p_write_"
-			"active_first==NULL\n",dev->name,__FUNCTION__);
+			"active_first==NULL\n",dev->name,__func__);
 #endif
         }
 #ifdef IOTRACE
@@ -3819,7 +3819,7 @@
 	CLAW_DBF_TEXT_(4,trace,"FWC=%d",privptr->write_free_count);
 #ifdef FUNCTRACE
         printk(KERN_INFO "%s:%s Exit on line %d free_count =%d\n",
-		dev->name,__FUNCTION__, __LINE__,privptr->write_free_count);
+		dev->name,__func__, __LINE__,privptr->write_free_count);
 #endif
         return;
 }
@@ -3833,7 +3833,7 @@
 {
 	struct claw_privbk *privptr;
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter\n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter\n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(2,setup,"free_dev");
 
@@ -3854,7 +3854,7 @@
 #endif
 	CLAW_DBF_TEXT(2,setup,"feee_ok");
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Exit\n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Exit\n",dev->name,__func__);
 #endif
 }
 
@@ -3867,13 +3867,13 @@
 claw_init_netdevice(struct net_device * dev)
 {
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter\n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter\n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(2,setup,"init_dev");
 	CLAW_DBF_TEXT_(2,setup,"%s",dev->name);
 	if (!dev) {
         printk(KERN_WARNING "claw:%s BAD Device exit line %d\n",
-		__FUNCTION__,__LINE__);
+		__func__,__LINE__);
 		CLAW_DBF_TEXT(2,setup,"baddev");
 		return;
 	}
@@ -3889,7 +3889,7 @@
 	dev->tx_queue_len = 1300;
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Exit\n",dev->name,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Exit\n",dev->name,__func__);
 #endif
 	CLAW_DBF_TEXT(2,setup,"initok");
 	return;
@@ -3909,7 +3909,7 @@
 	struct ccw_dev_id dev_id;
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "%s:%s Enter\n",cdev->dev.bus_id,__FUNCTION__);
+	printk(KERN_INFO "%s:%s Enter\n",cdev->dev.bus_id,__func__);
 #endif
 	CLAW_DBF_TEXT_(2,setup,"%s",cdev->dev.bus_id);
 	privptr->channel[i].flag  = i+1;   /* Read is 1 Write is 2 */
@@ -3920,16 +3920,16 @@
 	p_ch->devno = dev_id.devno;
 	if ((p_ch->irb = kzalloc(sizeof (struct irb),GFP_KERNEL)) == NULL) {
 		printk(KERN_WARNING "%s Out of memory in %s for irb\n",
-			p_ch->id,__FUNCTION__);
+			p_ch->id,__func__);
 #ifdef FUNCTRACE
         	printk(KERN_INFO "%s:%s Exit on line %d\n",
-			p_ch->id,__FUNCTION__,__LINE__);
+			p_ch->id,__func__,__LINE__);
 #endif
 		return -ENOMEM;
 	}
 #ifdef FUNCTRACE
         	printk(KERN_INFO "%s:%s Exit on line %d\n",
-			cdev->dev.bus_id,__FUNCTION__,__LINE__);
+			cdev->dev.bus_id,__func__,__LINE__);
 #endif
 	return 0;
 }
@@ -3952,7 +3952,7 @@
 	int ret;
 	struct ccw_dev_id dev_id;
 
-	pr_debug("%s() called\n", __FUNCTION__);
+	pr_debug("%s() called\n", __func__);
 	printk(KERN_INFO "claw: add for %s\n",cgdev->cdev[READ]->dev.bus_id);
 	CLAW_DBF_TEXT(2,setup,"new_dev");
 	privptr = cgdev->dev.driver_data;
@@ -3990,7 +3990,7 @@
 	}
 	dev = alloc_netdev(0,"claw%d",claw_init_netdevice);
 	if (!dev) {
-		printk(KERN_WARNING "%s:alloc_netdev failed\n",__FUNCTION__);
+		printk(KERN_WARNING "%s:alloc_netdev failed\n",__func__);
 		goto out;
 	}
 	dev->priv = privptr;
@@ -4065,7 +4065,7 @@
 	struct net_device *ndev;
 	int	ret;
 
-	pr_debug("%s() called\n", __FUNCTION__);
+	pr_debug("%s() called\n", __func__);
 	CLAW_DBF_TEXT_(2,setup,"%s",cgdev->dev.bus_id);
 	priv = cgdev->dev.driver_data;
 	if (!priv)
@@ -4095,15 +4095,15 @@
 {
 	struct claw_privbk *priv;
 
-	pr_debug("%s() called\n", __FUNCTION__);
+	pr_debug("%s() called\n", __func__);
 	CLAW_DBF_TEXT_(2,setup,"%s",cgdev->dev.bus_id);
 	priv = cgdev->dev.driver_data;
 	if (!priv) {
-		printk(KERN_WARNING "claw: %s() no Priv exiting\n",__FUNCTION__);
+		printk(KERN_WARNING "claw: %s() no Priv exiting\n",__func__);
 		return;
 	}
 	printk(KERN_INFO "claw: %s() called %s will be removed.\n",
-			__FUNCTION__,cgdev->cdev[0]->dev.bus_id);
+			__func__,cgdev->cdev[0]->dev.bus_id);
 	if (cgdev->state == CCWGROUP_ONLINE)
 		claw_shutdown_device(cgdev);
 	claw_remove_files(&cgdev->dev);
@@ -4346,7 +4346,7 @@
 static int
 claw_add_files(struct device *dev)
 {
-	pr_debug("%s() called\n", __FUNCTION__);
+	pr_debug("%s() called\n", __func__);
 	CLAW_DBF_TEXT(2,setup,"add_file");
 	return sysfs_create_group(&dev->kobj, &claw_attr_group);
 }
@@ -4354,7 +4354,7 @@
 static void
 claw_remove_files(struct device *dev)
 {
-	pr_debug("%s() called\n", __FUNCTION__);
+	pr_debug("%s() called\n", __func__);
 	CLAW_DBF_TEXT(2,setup,"rem_file");
 	sysfs_remove_group(&dev->kobj, &claw_attr_group);
 }
@@ -4385,12 +4385,12 @@
 	printk(KERN_INFO "claw: starting driver\n");
 
 #ifdef FUNCTRACE
-        printk(KERN_INFO "claw: %s() enter \n",__FUNCTION__);
+	printk(KERN_INFO "claw: %s() enter \n",__func__);
 #endif
 	ret = claw_register_debug_facility();
 	if (ret) {
 		printk(KERN_WARNING "claw: %s() debug_register failed %d\n",
-			__FUNCTION__,ret);
+			__func__,ret);
 		return ret;
 	}
 	CLAW_DBF_TEXT(2,setup,"init_mod");
@@ -4398,10 +4398,10 @@
 	if (ret) {
 		claw_unregister_debug_facility();
 		printk(KERN_WARNING "claw; %s() cu3088 register failed %d\n",
-			__FUNCTION__,ret);
+			__func__,ret);
 	}
 #ifdef FUNCTRACE
-        printk(KERN_INFO "claw: %s() exit \n",__FUNCTION__);
+	printk(KERN_INFO "claw: %s() exit \n",__func__);
 #endif
 	return ret;
 }
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 874a199..8f876f6 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -670,7 +670,7 @@
 	struct netiucv_priv *privptr = netdev_priv(conn->netdev);
 	int rc;
 
-	IUCV_DBF_TEXT(trace, 4, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 4, __func__);
 
 	if (!conn->netdev) {
 		iucv_message_reject(conn->path, msg);
@@ -718,7 +718,7 @@
 	struct ll_header header;
 	int rc;
 
-	IUCV_DBF_TEXT(trace, 4, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 4, __func__);
 
 	if (conn && conn->netdev)
 		privptr = netdev_priv(conn->netdev);
@@ -799,7 +799,7 @@
 	struct netiucv_priv *privptr = netdev_priv(netdev);
 	int rc;
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 
 	conn->path = path;
 	path->msglim = NETIUCV_QUEUELEN_DEFAULT;
@@ -821,7 +821,7 @@
 	struct iucv_event *ev = arg;
 	struct iucv_path *path = ev->data;
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 	iucv_path_sever(path, NULL);
 }
 
@@ -831,7 +831,7 @@
 	struct net_device *netdev = conn->netdev;
 	struct netiucv_priv *privptr = netdev_priv(netdev);
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 	fsm_deltimer(&conn->timer);
 	fsm_newstate(fi, CONN_STATE_IDLE);
 	netdev->tx_queue_len = conn->path->msglim;
@@ -842,7 +842,7 @@
 {
 	struct iucv_connection *conn = arg;
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 	fsm_deltimer(&conn->timer);
 	iucv_path_sever(conn->path, NULL);
 	fsm_newstate(fi, CONN_STATE_STARTWAIT);
@@ -854,7 +854,7 @@
 	struct net_device *netdev = conn->netdev;
 	struct netiucv_priv *privptr = netdev_priv(netdev);
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 
 	fsm_deltimer(&conn->timer);
 	iucv_path_sever(conn->path, NULL);
@@ -870,7 +870,7 @@
 	struct iucv_connection *conn = arg;
 	int rc;
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 
 	fsm_newstate(fi, CONN_STATE_STARTWAIT);
 	PRINT_DEBUG("%s('%s'): connecting ...\n",
@@ -948,7 +948,7 @@
 	struct net_device *netdev = conn->netdev;
 	struct netiucv_priv *privptr = netdev_priv(netdev);
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 
 	fsm_deltimer(&conn->timer);
 	fsm_newstate(fi, CONN_STATE_STOPPED);
@@ -1024,7 +1024,7 @@
 	struct net_device   *dev = arg;
 	struct netiucv_priv *privptr = netdev_priv(dev);
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 
 	fsm_newstate(fi, DEV_STATE_STARTWAIT);
 	fsm_event(privptr->conn->fsm, CONN_EVENT_START, privptr->conn);
@@ -1044,7 +1044,7 @@
 	struct netiucv_priv *privptr = netdev_priv(dev);
 	struct iucv_event   ev;
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 
 	ev.conn = privptr->conn;
 
@@ -1066,7 +1066,7 @@
 	struct net_device   *dev = arg;
 	struct netiucv_priv *privptr = netdev_priv(dev);
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 
 	switch (fsm_getstate(fi)) {
 		case DEV_STATE_STARTWAIT:
@@ -1097,7 +1097,7 @@
 static void
 dev_action_conndown(fsm_instance *fi, int event, void *arg)
 {
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 
 	switch (fsm_getstate(fi)) {
 		case DEV_STATE_RUNNING:
@@ -1288,7 +1288,7 @@
 	struct netiucv_priv *privptr = netdev_priv(dev);
 	int rc;
 
-	IUCV_DBF_TEXT(trace, 4, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 4, __func__);
 	/**
 	 * Some sanity checks ...
 	 */
@@ -1344,7 +1344,7 @@
 {
 	struct netiucv_priv *priv = netdev_priv(dev);
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	return &priv->stats;
 }
 
@@ -1360,7 +1360,7 @@
  */
 static int netiucv_change_mtu(struct net_device * dev, int new_mtu)
 {
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 	if (new_mtu < 576 || new_mtu > NETIUCV_MTU_MAX) {
 		IUCV_DBF_TEXT(setup, 2, "given MTU out of valid range\n");
 		return -EINVAL;
@@ -1378,7 +1378,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	return sprintf(buf, "%s\n", netiucv_printname(priv->conn->userid));
 }
 
@@ -1393,7 +1393,7 @@
 	int 	i;
 	struct iucv_connection *cp;
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 	if (count > 9) {
 		PRINT_WARN("netiucv: username too long (%d)!\n", (int) count);
 		IUCV_DBF_TEXT_(setup, 2,
@@ -1449,7 +1449,7 @@
 			    char *buf)
 {	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	return sprintf(buf, "%d\n", priv->conn->max_buffsize);
 }
 
@@ -1461,7 +1461,7 @@
 	char         *e;
 	int          bs1;
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 	if (count >= 39)
 		return -EINVAL;
 
@@ -1513,7 +1513,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	return sprintf(buf, "%s\n", fsm_getstate_str(priv->fsm));
 }
 
@@ -1524,7 +1524,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	return sprintf(buf, "%s\n", fsm_getstate_str(priv->conn->fsm));
 }
 
@@ -1535,7 +1535,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	return sprintf(buf, "%ld\n", priv->conn->prof.maxmulti);
 }
 
@@ -1545,7 +1545,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 4, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 4, __func__);
 	priv->conn->prof.maxmulti = 0;
 	return count;
 }
@@ -1557,7 +1557,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	return sprintf(buf, "%ld\n", priv->conn->prof.maxcqueue);
 }
 
@@ -1566,7 +1566,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 4, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 4, __func__);
 	priv->conn->prof.maxcqueue = 0;
 	return count;
 }
@@ -1578,7 +1578,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	return sprintf(buf, "%ld\n", priv->conn->prof.doios_single);
 }
 
@@ -1587,7 +1587,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 4, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 4, __func__);
 	priv->conn->prof.doios_single = 0;
 	return count;
 }
@@ -1599,7 +1599,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	return sprintf(buf, "%ld\n", priv->conn->prof.doios_multi);
 }
 
@@ -1608,7 +1608,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	priv->conn->prof.doios_multi = 0;
 	return count;
 }
@@ -1620,7 +1620,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	return sprintf(buf, "%ld\n", priv->conn->prof.txlen);
 }
 
@@ -1629,7 +1629,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 4, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 4, __func__);
 	priv->conn->prof.txlen = 0;
 	return count;
 }
@@ -1641,7 +1641,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	return sprintf(buf, "%ld\n", priv->conn->prof.tx_time);
 }
 
@@ -1650,7 +1650,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 4, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 4, __func__);
 	priv->conn->prof.tx_time = 0;
 	return count;
 }
@@ -1662,7 +1662,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	return sprintf(buf, "%ld\n", priv->conn->prof.tx_pending);
 }
 
@@ -1671,7 +1671,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 4, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 4, __func__);
 	priv->conn->prof.tx_pending = 0;
 	return count;
 }
@@ -1683,7 +1683,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 5, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 5, __func__);
 	return sprintf(buf, "%ld\n", priv->conn->prof.tx_max_pending);
 }
 
@@ -1692,7 +1692,7 @@
 {
 	struct netiucv_priv *priv = dev->driver_data;
 
-	IUCV_DBF_TEXT(trace, 4, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 4, __func__);
 	priv->conn->prof.tx_max_pending = 0;
 	return count;
 }
@@ -1732,7 +1732,7 @@
 {
 	int ret;
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 	ret = sysfs_create_group(&dev->kobj, &netiucv_attr_group);
 	if (ret)
 		return ret;
@@ -1744,7 +1744,7 @@
 
 static void netiucv_remove_files(struct device *dev)
 {
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 	sysfs_remove_group(&dev->kobj, &netiucv_stat_attr_group);
 	sysfs_remove_group(&dev->kobj, &netiucv_attr_group);
 }
@@ -1756,7 +1756,7 @@
 	int ret;
 
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 
 	if (dev) {
 		snprintf(dev->bus_id, BUS_ID_SIZE, "net%s", ndev->name);
@@ -1792,7 +1792,7 @@
 
 static void netiucv_unregister_device(struct device *dev)
 {
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 	netiucv_remove_files(dev);
 	device_unregister(dev);
 }
@@ -1857,7 +1857,7 @@
  */
 static void netiucv_remove_connection(struct iucv_connection *conn)
 {
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 	write_lock_bh(&iucv_connection_rwlock);
 	list_del_init(&conn->list);
 	write_unlock_bh(&iucv_connection_rwlock);
@@ -1881,7 +1881,7 @@
 {
 	struct netiucv_priv *privptr = netdev_priv(dev);
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 
 	if (!dev)
 		return;
@@ -1963,7 +1963,7 @@
 	struct netiucv_priv *priv;
 	struct iucv_connection *cp;
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 	if (count>9) {
 		PRINT_WARN("netiucv: username too long (%d)!\n", (int)count);
 		IUCV_DBF_TEXT(setup, 2, "conn_write: too long\n");
@@ -2048,7 +2048,7 @@
 	const char *p;
         int i;
 
-        IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 
         if (count >= IFNAMSIZ)
                 count = IFNAMSIZ - 1;;
@@ -2116,7 +2116,7 @@
 	struct netiucv_priv *priv;
 	struct device *dev;
 
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
+	IUCV_DBF_TEXT(trace, 3, __func__);
 	while (!list_empty(&iucv_connection_list)) {
 		cp = list_entry(iucv_connection_list.next,
 				struct iucv_connection, list);
@@ -2146,8 +2146,7 @@
 	rc = iucv_register(&netiucv_handler, 1);
 	if (rc)
 		goto out_dbf;
-	IUCV_DBF_TEXT(trace, 3, __FUNCTION__);
-	netiucv_driver.groups = netiucv_drv_attr_groups;
+	IUCV_DBF_TEXT(trace, 3, __func__);
 	rc = driver_register(&netiucv_driver);
 	if (rc) {
 		PRINT_ERR("NETIUCV: failed to register driver.\n");
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 644a06e..4d4b542 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -59,15 +59,15 @@
 
 			printk(KERN_WARNING"%s: Code does not support more "
 			       "than two chained crws; please report to "
-			       "linux390@de.ibm.com!\n", __FUNCTION__);
+			       "linux390@de.ibm.com!\n", __func__);
 			ccode = stcrw(&tmp_crw);
 			printk(KERN_WARNING"%s: crw reports slct=%d, oflw=%d, "
 			       "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n",
-			       __FUNCTION__, tmp_crw.slct, tmp_crw.oflw,
+			       __func__, tmp_crw.slct, tmp_crw.oflw,
 			       tmp_crw.chn, tmp_crw.rsc, tmp_crw.anc,
 			       tmp_crw.erc, tmp_crw.rsid);
 			printk(KERN_WARNING"%s: This was crw number %x in the "
-			       "chain\n", __FUNCTION__, chain);
+			       "chain\n", __func__, chain);
 			if (ccode != 0)
 				break;
 			chain = tmp_crw.chn ? chain + 1 : 0;
@@ -83,7 +83,7 @@
 		       crw[chain].rsid);
 		/* Check for overflows. */
 		if (crw[chain].oflw) {
-			pr_debug("%s: crw overflow detected!\n", __FUNCTION__);
+			pr_debug("%s: crw overflow detected!\n", __func__);
 			css_schedule_eval_all();
 			chain = 0;
 			continue;
diff --git a/drivers/s390/s390mach.h b/drivers/s390/s390mach.h
index d3ca428..ca681f9 100644
--- a/drivers/s390/s390mach.h
+++ b/drivers/s390/s390mach.h
@@ -105,4 +105,8 @@
 #define ED_ETR_SYNC	12	/* External damage ETR sync check */
 #define ED_ETR_SWITCH	13	/* External damage ETR switch to local */
 
+struct pt_regs;
+
+void s390_handle_mcck(void);
+void s390_do_machine_check(struct pt_regs *regs);
 #endif /* __s390mach */
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 9e9f6c1..45a7cd9 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -539,7 +539,7 @@
 
 /* logging routine for zfcp */
 #define _ZFCP_LOG(fmt, args...) \
-	printk(KERN_ERR ZFCP_NAME": %s(%d): " fmt, __FUNCTION__, \
+	printk(KERN_ERR ZFCP_NAME": %s(%d): " fmt, __func__, \
 	       __LINE__ , ##args)
 
 #define ZFCP_LOG(level, fmt, args...) \
diff --git a/drivers/s390/sysinfo.c b/drivers/s390/sysinfo.c
index 291ff62..c3e4ab0 100644
--- a/drivers/s390/sysinfo.c
+++ b/drivers/s390/sysinfo.c
@@ -11,111 +11,13 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
 
 /* Sigh, math-emu. Don't ask. */
 #include <asm/sfp-util.h>
 #include <math-emu/soft-fp.h>
 #include <math-emu/single.h>
 
-struct sysinfo_1_1_1 {
-	char reserved_0[32];
-	char manufacturer[16];
-	char type[4];
-	char reserved_1[12];
-	char model_capacity[16];
-	char sequence[16];
-	char plant[4];
-	char model[16];
-};
-
-struct sysinfo_1_2_1 {
-	char reserved_0[80];
-	char sequence[16];
-	char plant[4];
-	char reserved_1[2];
-	unsigned short cpu_address;
-};
-
-struct sysinfo_1_2_2 {
-	char format;
-	char reserved_0[1];
-	unsigned short acc_offset;
-	char reserved_1[24];
-	unsigned int secondary_capability;
-	unsigned int capability;
-	unsigned short cpus_total;
-	unsigned short cpus_configured;
-	unsigned short cpus_standby;
-	unsigned short cpus_reserved;
-	unsigned short adjustment[0];
-};
-
-struct sysinfo_1_2_2_extension {
-	unsigned int alt_capability;
-	unsigned short alt_adjustment[0];
-};
-
-struct sysinfo_2_2_1 {
-	char reserved_0[80];
-	char sequence[16];
-	char plant[4];
-	unsigned short cpu_id;
-	unsigned short cpu_address;
-};
-
-struct sysinfo_2_2_2 {
-	char reserved_0[32];
-	unsigned short lpar_number;
-	char reserved_1;
-	unsigned char characteristics;
-	unsigned short cpus_total;
-	unsigned short cpus_configured;
-	unsigned short cpus_standby;
-	unsigned short cpus_reserved;
-	char name[8];
-	unsigned int caf;
-	char reserved_2[16];
-	unsigned short cpus_dedicated;
-	unsigned short cpus_shared;
-};
-
-#define LPAR_CHAR_DEDICATED	(1 << 7)
-#define LPAR_CHAR_SHARED	(1 << 6)
-#define LPAR_CHAR_LIMITED	(1 << 5)
-
-struct sysinfo_3_2_2 {
-	char reserved_0[31];
-	unsigned char count;
-	struct {
-		char reserved_0[4];
-		unsigned short cpus_total;
-		unsigned short cpus_configured;
-		unsigned short cpus_standby;
-		unsigned short cpus_reserved;
-		char name[8];
-		unsigned int caf;
-		char cpi[16];
-		char reserved_1[24];
-
-	} vm[8];
-};
-
-static inline int stsi(void *sysinfo, int fc, int sel1, int sel2)
-{
-	register int r0 asm("0") = (fc << 28) | sel1;
-	register int r1 asm("1") = sel2;
-
-	asm volatile(
-		"   stsi 0(%2)\n"
-		"0: jz   2f\n"
-		"1: lhi  %0,%3\n"
-		"2:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (r0) : "d" (r1), "a" (sysinfo), "K" (-ENOSYS)
-		: "cc", "memory" );
-	return r0;
-}
-
 static inline int stsi_0(void)
 {
 	int rc = stsi (NULL, 0, 0, 0);
@@ -133,6 +35,8 @@
 	EBCASC(info->sequence, sizeof(info->sequence));
 	EBCASC(info->plant, sizeof(info->plant));
 	EBCASC(info->model_capacity, sizeof(info->model_capacity));
+	EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap));
+	EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
 	len += sprintf(page + len, "Manufacturer:         %-16.16s\n",
 		       info->manufacturer);
 	len += sprintf(page + len, "Type:                 %-4.4s\n",
@@ -155,8 +59,18 @@
 		       info->sequence);
 	len += sprintf(page + len, "Plant:                %-4.4s\n",
 		       info->plant);
-	len += sprintf(page + len, "Model Capacity:       %-16.16s\n",
-		       info->model_capacity);
+	len += sprintf(page + len, "Model Capacity:       %-16.16s %08u\n",
+		       info->model_capacity, *(u32 *) info->model_cap_rating);
+	if (info->model_perm_cap[0] != '\0')
+		len += sprintf(page + len,
+			       "Model Perm. Capacity: %-16.16s %08u\n",
+			       info->model_perm_cap,
+			       *(u32 *) info->model_perm_cap_rating);
+	if (info->model_temp_cap[0] != '\0')
+		len += sprintf(page + len,
+			       "Model Temp. Capacity: %-16.16s %08u\n",
+			       info->model_temp_cap,
+			       *(u32 *) info->model_temp_cap_rating);
 	return len;
 }
 
diff --git a/include/asm-alpha/semaphore.h b/include/asm-alpha/semaphore.h
index f1e9278..d9b2034 100644
--- a/include/asm-alpha/semaphore.h
+++ b/include/asm-alpha/semaphore.h
@@ -1,149 +1 @@
-#ifndef _ALPHA_SEMAPHORE_H
-#define _ALPHA_SEMAPHORE_H
-
-/*
- * SMP- and interrupt-safe semaphores..
- *
- * (C) Copyright 1996 Linus Torvalds
- * (C) Copyright 1996, 2000 Richard Henderson
- */
-
-#include <asm/current.h>
-#include <asm/system.h>
-#include <asm/atomic.h>
-#include <linux/compiler.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-struct semaphore {
-	atomic_t count;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)			\
-{								\
-	.count	= ATOMIC_INIT(n),				\
-  	.wait	= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait),	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count)		\
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name)		__DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init(struct semaphore *sem, int val)
-{
-	/*
-	 * Logically, 
-	 *   *sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
-	 * except that gcc produces better initializing by parts yet.
-	 */
-
-	atomic_set(&sem->count, val);
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-extern void down(struct semaphore *);
-extern void __down_failed(struct semaphore *);
-extern int  down_interruptible(struct semaphore *);
-extern int  __down_failed_interruptible(struct semaphore *);
-extern int  down_trylock(struct semaphore *);
-extern void up(struct semaphore *);
-extern void __up_wakeup(struct semaphore *);
-
-/*
- * Hidden out of line code is fun, but extremely messy.  Rely on newer
- * compilers to do a respectable job with this.  The contention cases
- * are handled out of line in arch/alpha/kernel/semaphore.c.
- */
-
-static inline void __down(struct semaphore *sem)
-{
-	long count;
-	might_sleep();
-	count = atomic_dec_return(&sem->count);
-	if (unlikely(count < 0))
-		__down_failed(sem);
-}
-
-static inline int __down_interruptible(struct semaphore *sem)
-{
-	long count;
-	might_sleep();
-	count = atomic_dec_return(&sem->count);
-	if (unlikely(count < 0))
-		return __down_failed_interruptible(sem);
-	return 0;
-}
-
-/*
- * down_trylock returns 0 on success, 1 if we failed to get the lock.
- */
-
-static inline int __down_trylock(struct semaphore *sem)
-{
-	long ret;
-
-	/* "Equivalent" C:
-
-	   do {
-		ret = ldl_l;
-		--ret;
-		if (ret < 0)
-			break;
-		ret = stl_c = ret;
-	   } while (ret == 0);
-	*/
-	__asm__ __volatile__(
-		"1:	ldl_l	%0,%1\n"
-		"	subl	%0,1,%0\n"
-		"	blt	%0,2f\n"
-		"	stl_c	%0,%1\n"
-		"	beq	%0,3f\n"
-		"	mb\n"
-		"2:\n"
-		".subsection 2\n"
-		"3:	br	1b\n"
-		".previous"
-		: "=&r" (ret), "=m" (sem->count)
-		: "m" (sem->count));
-
-	return ret < 0;
-}
-
-static inline void __up(struct semaphore *sem)
-{
-	if (unlikely(atomic_inc_return(&sem->count) <= 0))
-		__up_wakeup(sem);
-}
-
-#if !defined(CONFIG_DEBUG_SEMAPHORE)
-extern inline void down(struct semaphore *sem)
-{
-	__down(sem);
-}
-extern inline int down_interruptible(struct semaphore *sem)
-{
-	return __down_interruptible(sem);
-}
-extern inline int down_trylock(struct semaphore *sem)
-{
-	return __down_trylock(sem);
-}
-extern inline void up(struct semaphore *sem)
-{
-	__up(sem);
-}
-#endif
-
-#endif
+#include <linux/semaphore.h>
diff --git a/include/asm-arm/semaphore-helper.h b/include/asm-arm/semaphore-helper.h
deleted file mode 100644
index 1d7f198..0000000
--- a/include/asm-arm/semaphore-helper.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef ASMARM_SEMAPHORE_HELPER_H
-#define ASMARM_SEMAPHORE_HELPER_H
-
-/*
- * These two _must_ execute atomically wrt each other.
- */
-static inline void wake_one_more(struct semaphore * sem)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (atomic_read(&sem->count) <= 0)
-		sem->waking++;
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-}
-
-static inline int waking_non_zero(struct semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->waking > 0) {
-		sem->waking--;
-		ret = 1;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking non zero interruptible
- *	1	got the lock
- *	0	go to sleep
- *	-EINTR	interrupted
- *
- * We must undo the sem->count down_interruptible() increment while we are
- * protected by the spinlock in order to make this atomic_inc() with the
- * atomic_read() in wake_one_more(), otherwise we can race. -arca
- */
-static inline int waking_non_zero_interruptible(struct semaphore *sem,
-						struct task_struct *tsk)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->waking > 0) {
-		sem->waking--;
-		ret = 1;
-	} else if (signal_pending(tsk)) {
-		atomic_inc(&sem->count);
-		ret = -EINTR;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;	
-}
-
-/*
- * waking_non_zero_try_lock:
- *	1	failed to lock
- *	0	got the lock
- *
- * We must undo the sem->count down_interruptible() increment while we are
- * protected by the spinlock in order to make this atomic_inc() with the
- * atomic_read() in wake_one_more(), otherwise we can race. -arca
- */
-static inline int waking_non_zero_trylock(struct semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 1;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->waking <= 0)
-		atomic_inc(&sem->count);
-	else {
-		sem->waking--;
-		ret = 0;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-#endif
diff --git a/include/asm-arm/semaphore.h b/include/asm-arm/semaphore.h
index 1c8b441..d9b2034 100644
--- a/include/asm-arm/semaphore.h
+++ b/include/asm-arm/semaphore.h
@@ -1,98 +1 @@
-/*
- * linux/include/asm-arm/semaphore.h
- */
-#ifndef __ASM_ARM_SEMAPHORE_H
-#define __ASM_ARM_SEMAPHORE_H
-
-#include <linux/linkage.h>
-#include <linux/spinlock.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-#include <asm/atomic.h>
-#include <asm/locks.h>
-
-struct semaphore {
-	atomic_t count;
-	int sleepers;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INIT(name, cnt)				\
-{								\
-	.count	= ATOMIC_INIT(cnt),				\
-	.wait	= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait),	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count)	\
-	struct semaphore name = __SEMAPHORE_INIT(name,count)
-
-#define DECLARE_MUTEX(name)		__DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init(struct semaphore *sem, int val)
-{
-	atomic_set(&sem->count, val);
-	sem->sleepers = 0;
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX(struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED(struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-/*
- * special register calling convention
- */
-asmlinkage void __down_failed(void);
-asmlinkage int  __down_interruptible_failed(void);
-asmlinkage int  __down_trylock_failed(void);
-asmlinkage void __up_wakeup(void);
-
-extern void __down(struct semaphore * sem);
-extern int  __down_interruptible(struct semaphore * sem);
-extern int  __down_trylock(struct semaphore * sem);
-extern void __up(struct semaphore * sem);
-
-/*
- * This is ugly, but we want the default case to fall through.
- * "__down" is the actual routine that waits...
- */
-static inline void down(struct semaphore * sem)
-{
-	might_sleep();
-	__down_op(sem, __down_failed);
-}
-
-/*
- * This is ugly, but we want the default case to fall through.
- * "__down_interruptible" is the actual routine that waits...
- */
-static inline int down_interruptible (struct semaphore * sem)
-{
-	might_sleep();
-	return __down_op_ret(sem, __down_interruptible_failed);
-}
-
-static inline int down_trylock(struct semaphore *sem)
-{
-	return __down_op_ret(sem, __down_trylock_failed);
-}
-
-/*
- * Note! This is subtle. We jump to wake people up only if
- * the semaphore was negative (== somebody was waiting on it).
- * The default case (no contention) will result in NO
- * jumps for both down() and up().
- */
-static inline void up(struct semaphore * sem)
-{
-	__up_op(sem, __up_wakeup);
-}
-
-#endif
+#include <linux/semaphore.h>
diff --git a/include/asm-avr32/semaphore.h b/include/asm-avr32/semaphore.h
index feaf1d4..d9b2034 100644
--- a/include/asm-avr32/semaphore.h
+++ b/include/asm-avr32/semaphore.h
@@ -1,108 +1 @@
-/*
- * SMP- and interrupt-safe semaphores.
- *
- * Copyright (C) 2006 Atmel Corporation
- *
- * Based on include/asm-i386/semaphore.h
- *   Copyright (C) 1996 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef __ASM_AVR32_SEMAPHORE_H
-#define __ASM_AVR32_SEMAPHORE_H
-
-#include <linux/linkage.h>
-
-#include <asm/system.h>
-#include <asm/atomic.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-struct semaphore {
-	atomic_t count;
-	int sleepers;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-	atomic_set(&sem->count, val);
-	sem->sleepers = 0;
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-void __down(struct semaphore * sem);
-int  __down_interruptible(struct semaphore * sem);
-void __up(struct semaphore * sem);
-
-/*
- * This is ugly, but we want the default case to fall through.
- * "__down_failed" is a special asm handler that calls the C
- * routine that actually waits. See arch/i386/kernel/semaphore.c
- */
-static inline void down(struct semaphore * sem)
-{
-	might_sleep();
-	if (unlikely(atomic_dec_return (&sem->count) < 0))
-		__down (sem);
-}
-
-/*
- * Interruptible try to acquire a semaphore.  If we obtained
- * it, return zero.  If we were interrupted, returns -EINTR
- */
-static inline int down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-
-	might_sleep();
-	if (unlikely(atomic_dec_return (&sem->count) < 0))
-		ret = __down_interruptible (sem);
-	return ret;
-}
-
-/*
- * Non-blockingly attempt to down() a semaphore.
- * Returns zero if we acquired it
- */
-static inline int down_trylock(struct semaphore * sem)
-{
-	return atomic_dec_if_positive(&sem->count) < 0;
-}
-
-/*
- * Note! This is subtle. We jump to wake people up only if
- * the semaphore was negative (== somebody was waiting on it).
- * The default case (no contention) will result in NO
- * jumps for both down() and up().
- */
-static inline void up(struct semaphore * sem)
-{
-	if (unlikely(atomic_inc_return (&sem->count) <= 0))
-		__up (sem);
-}
-
-#endif /*__ASM_AVR32_SEMAPHORE_H */
+#include <linux/semaphore.h>
diff --git a/include/asm-blackfin/semaphore-helper.h b/include/asm-blackfin/semaphore-helper.h
deleted file mode 100644
index 9082b0d..0000000
--- a/include/asm-blackfin/semaphore-helper.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* Based on M68K version,	Lineo Inc.	May 2001 */
-
-#ifndef _BFIN_SEMAPHORE_HELPER_H
-#define _BFIN_SEMAPHORE_HELPER_H
-
-/*
- * SMP- and interrupt-safe semaphores helper functions.
- *
- * (C) Copyright 1996 Linus Torvalds
- *
- */
-
-#include <asm/errno.h>
-
-/*
- * These two _must_ execute atomically wrt each other.
- */
-static inline void wake_one_more(struct semaphore *sem)
-{
-	atomic_inc(&sem->waking);
-}
-
-static inline int waking_non_zero(struct semaphore *sem)
-{
-	int ret;
-	unsigned long flags = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	ret = 0;
-	if (atomic_read(&sem->waking) > 0) {
-		atomic_dec(&sem->waking);
-		ret = 1;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_interruptible:
- *	1	got the lock
- *	0	go to sleep
- *	-EINTR	interrupted
- */
-static inline int waking_non_zero_interruptible(struct semaphore *sem,
-						struct task_struct *tsk)
-{
-	int ret = 0;
-	unsigned long flags = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (atomic_read(&sem->waking) > 0) {
-		atomic_dec(&sem->waking);
-		ret = 1;
-	} else if (signal_pending(tsk)) {
-		atomic_inc(&sem->count);
-		ret = -EINTR;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_trylock:
- *	1	failed to lock
- *	0	got the lock
- */
-static inline int waking_non_zero_trylock(struct semaphore *sem)
-{
-	int ret = 1;
-	unsigned long flags = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (atomic_read(&sem->waking) > 0) {
-		atomic_dec(&sem->waking);
-		ret = 0;
-	} else
-		atomic_inc(&sem->count);
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-#endif				/* _BFIN_SEMAPHORE_HELPER_H */
diff --git a/include/asm-blackfin/semaphore.h b/include/asm-blackfin/semaphore.h
index 533f90f..d9b2034 100644
--- a/include/asm-blackfin/semaphore.h
+++ b/include/asm-blackfin/semaphore.h
@@ -1,105 +1 @@
-#ifndef _BFIN_SEMAPHORE_H
-#define _BFIN_SEMAPHORE_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/linkage.h>
-#include <linux/wait.h>
-#include <linux/spinlock.h>
-#include <linux/rwsem.h>
-#include <asm/atomic.h>
-
-/*
- * Interrupt-safe semaphores..
- *
- * (C) Copyright 1996 Linus Torvalds
- *
- * BFIN version by akbar hussain Lineo Inc  April 2001
- *
- */
-
-struct semaphore {
-	atomic_t count;
-	int sleepers;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.sleepers	= 0,						\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init(struct semaphore *sem, int val)
-{
-	*sem = (struct semaphore)__SEMAPHORE_INITIALIZER(*sem, val);
-}
-
-static inline void init_MUTEX(struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED(struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-asmlinkage void __down(struct semaphore *sem);
-asmlinkage int __down_interruptible(struct semaphore *sem);
-asmlinkage int __down_trylock(struct semaphore *sem);
-asmlinkage void __up(struct semaphore *sem);
-
-extern spinlock_t semaphore_wake_lock;
-
-/*
- * This is ugly, but we want the default case to fall through.
- * "down_failed" is a special asm handler that calls the C
- * routine that actually waits.
- */
-static inline void down(struct semaphore *sem)
-{
-	might_sleep();
-	if (atomic_dec_return(&sem->count) < 0)
-		__down(sem);
-}
-
-static inline int down_interruptible(struct semaphore *sem)
-{
-	int ret = 0;
-
-	might_sleep();
-	if (atomic_dec_return(&sem->count) < 0)
-		ret = __down_interruptible(sem);
-	return (ret);
-}
-
-static inline int down_trylock(struct semaphore *sem)
-{
-	int ret = 0;
-
-	if (atomic_dec_return(&sem->count) < 0)
-		ret = __down_trylock(sem);
-	return ret;
-}
-
-/*
- * Note! This is subtle. We jump to wake people up only if
- * the semaphore was negative (== somebody was waiting on it).
- * The default case (no contention) will result in NO
- * jumps for both down() and up().
- */
-static inline void up(struct semaphore *sem)
-{
-	if (atomic_inc_return(&sem->count) <= 0)
-		__up(sem);
-}
-
-#endif				/* __ASSEMBLY__ */
-#endif				/* _BFIN_SEMAPHORE_H */
+#include <linux/semaphore.h>
diff --git a/include/asm-cris/semaphore-helper.h b/include/asm-cris/semaphore-helper.h
deleted file mode 100644
index 27bfeca1..0000000
--- a/include/asm-cris/semaphore-helper.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/* $Id: semaphore-helper.h,v 1.3 2001/03/26 15:00:33 orjanf Exp $
- *
- * SMP- and interrupt-safe semaphores helper functions. Generic versions, no
- * optimizations whatsoever... 
- *
- */
-
-#ifndef _ASM_SEMAPHORE_HELPER_H
-#define _ASM_SEMAPHORE_HELPER_H
-
-#include <asm/atomic.h>
-#include <linux/errno.h>
-
-#define read(a) ((a)->counter)
-#define inc(a) (((a)->counter)++)
-#define dec(a) (((a)->counter)--)
-
-#define count_inc(a) ((*(a))++)
-
-/*
- * These two _must_ execute atomically wrt each other.
- */
-static inline void wake_one_more(struct semaphore * sem)
-{
-	atomic_inc(&sem->waking);
-}
-
-static inline int waking_non_zero(struct semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	local_irq_save(flags);
-	if (read(&sem->waking) > 0) {
-		dec(&sem->waking);
-		ret = 1;
-	}
-	local_irq_restore(flags);
-	return ret;
-}
-
-static inline int waking_non_zero_interruptible(struct semaphore *sem,
-						struct task_struct *tsk)
-{
-	int ret = 0;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	if (read(&sem->waking) > 0) {
-		dec(&sem->waking);
-		ret = 1;
-	} else if (signal_pending(tsk)) {
-		inc(&sem->count);
-		ret = -EINTR;
-	}
-	local_irq_restore(flags);
-	return ret;
-}
-
-static inline int waking_non_zero_trylock(struct semaphore *sem)
-{
-        int ret = 1;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	if (read(&sem->waking) <= 0)
-		inc(&sem->count);
-	else {
-		dec(&sem->waking);
-		ret = 0;
-	}
-	local_irq_restore(flags);
-	return ret;
-}
-
-#endif /* _ASM_SEMAPHORE_HELPER_H */
-
-
diff --git a/include/asm-cris/semaphore.h b/include/asm-cris/semaphore.h
index 31a4ac4..d9b2034 100644
--- a/include/asm-cris/semaphore.h
+++ b/include/asm-cris/semaphore.h
@@ -1,133 +1 @@
-/* $Id: semaphore.h,v 1.3 2001/05/08 13:54:09 bjornw Exp $ */
-
-/* On the i386 these are coded in asm, perhaps we should as well. Later.. */
-
-#ifndef _CRIS_SEMAPHORE_H
-#define _CRIS_SEMAPHORE_H
-
-#define RW_LOCK_BIAS             0x01000000
-
-#include <linux/wait.h>
-#include <linux/spinlock.h>
-#include <linux/rwsem.h>
-
-#include <asm/system.h>
-#include <asm/atomic.h>
-
-/*
- * CRIS semaphores, implemented in C-only so far. 
- */
-
-struct semaphore {
-	atomic_t count;
-	atomic_t waking;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.waking		= ATOMIC_INIT(0),				\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)    \
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-        struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init(struct semaphore *sem, int val)
-{
-	*sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-        sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-        sema_init(sem, 0);
-}
-
-extern void __down(struct semaphore * sem);
-extern int __down_interruptible(struct semaphore * sem);
-extern int __down_trylock(struct semaphore * sem);
-extern void __up(struct semaphore * sem);
-
-/* notice - we probably can do cli/sti here instead of saving */
-
-static inline void down(struct semaphore * sem)
-{
-	unsigned long flags;
-	int failed;
-
-	might_sleep();
-
-	/* atomically decrement the semaphores count, and if its negative, we wait */
-	cris_atomic_save(sem, flags);
-	failed = --(sem->count.counter) < 0;
-	cris_atomic_restore(sem, flags);
-	if(failed) {
-		__down(sem);
-	}
-}
-
-/*
- * This version waits in interruptible state so that the waiting
- * process can be killed.  The down_interruptible routine
- * returns negative for signalled and zero for semaphore acquired.
- */
-
-static inline int down_interruptible(struct semaphore * sem)
-{
-	unsigned long flags;
-	int failed;
-
-	might_sleep();
-
-	/* atomically decrement the semaphores count, and if its negative, we wait */
-	cris_atomic_save(sem, flags);
-	failed = --(sem->count.counter) < 0;
-	cris_atomic_restore(sem, flags);
-	if(failed)
-		failed = __down_interruptible(sem);
-	return(failed);
-}
-
-static inline int down_trylock(struct semaphore * sem)
-{
-	unsigned long flags;
-	int failed;
-
-	cris_atomic_save(sem, flags);
-	failed = --(sem->count.counter) < 0;
-	cris_atomic_restore(sem, flags);
-	if(failed)
-		failed = __down_trylock(sem);
-	return(failed);
-
-}
-
-/*
- * Note! This is subtle. We jump to wake people up only if
- * the semaphore was negative (== somebody was waiting on it).
- * The default case (no contention) will result in NO
- * jumps for both down() and up().
- */
-static inline void up(struct semaphore * sem)
-{  
-	unsigned long flags;
-	int wakeup;
-
-	/* atomically increment the semaphores count, and if it was negative, we wake people */
-	cris_atomic_save(sem, flags);
-	wakeup = ++(sem->count.counter) <= 0;
-	cris_atomic_restore(sem, flags);
-	if(wakeup) {
-		__up(sem);
-	}
-}
-
-#endif
+#include <linux/semaphore.h>
diff --git a/include/asm-frv/semaphore.h b/include/asm-frv/semaphore.h
index d7aaa19..d9b2034 100644
--- a/include/asm-frv/semaphore.h
+++ b/include/asm-frv/semaphore.h
@@ -1,155 +1 @@
-/* semaphore.h: semaphores for the FR-V
- *
- * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _ASM_SEMAPHORE_H
-#define _ASM_SEMAPHORE_H
-
-#define RW_LOCK_BIAS		 0x01000000
-
-#ifndef __ASSEMBLY__
-
-#include <linux/linkage.h>
-#include <linux/wait.h>
-#include <linux/spinlock.h>
-#include <linux/rwsem.h>
-
-/*
- * the semaphore definition
- * - if counter is >0 then there are tokens available on the semaphore for down to collect
- * - if counter is <=0 then there are no spare tokens, and anyone that wants one must wait
- * - if wait_list is not empty, then there are processes waiting for the semaphore
- */
-struct semaphore {
-	unsigned		counter;
-	spinlock_t		wait_lock;
-	struct list_head	wait_list;
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	unsigned		__magic;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-# define __SEM_DEBUG_INIT(name) , (long)&(name).__magic
-#else
-# define __SEM_DEBUG_INIT(name)
-#endif
-
-
-#define __SEMAPHORE_INITIALIZER(name,count) \
-{ count, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __SEM_DEBUG_INIT(name) }
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-	*sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-extern void __down(struct semaphore *sem, unsigned long flags);
-extern int  __down_interruptible(struct semaphore *sem, unsigned long flags);
-extern void __up(struct semaphore *sem);
-
-static inline void down(struct semaphore *sem)
-{
-	unsigned long flags;
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	CHECK_MAGIC(sem->__magic);
-#endif
-
-	spin_lock_irqsave(&sem->wait_lock, flags);
-	if (likely(sem->counter > 0)) {
-		sem->counter--;
-		spin_unlock_irqrestore(&sem->wait_lock, flags);
-	}
-	else {
-		__down(sem, flags);
-	}
-}
-
-static inline int down_interruptible(struct semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 0;
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	CHECK_MAGIC(sem->__magic);
-#endif
-
-	spin_lock_irqsave(&sem->wait_lock, flags);
-	if (likely(sem->counter > 0)) {
-		sem->counter--;
-		spin_unlock_irqrestore(&sem->wait_lock, flags);
-	}
-	else {
-		ret = __down_interruptible(sem, flags);
-	}
-	return ret;
-}
-
-/*
- * non-blockingly attempt to down() a semaphore.
- * - returns zero if we acquired it
- */
-static inline int down_trylock(struct semaphore *sem)
-{
-	unsigned long flags;
-	int success = 0;
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	CHECK_MAGIC(sem->__magic);
-#endif
-
-	spin_lock_irqsave(&sem->wait_lock, flags);
-	if (sem->counter > 0) {
-		sem->counter--;
-		success = 1;
-	}
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-	return !success;
-}
-
-static inline void up(struct semaphore *sem)
-{
-	unsigned long flags;
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	CHECK_MAGIC(sem->__magic);
-#endif
-
-	spin_lock_irqsave(&sem->wait_lock, flags);
-	if (!list_empty(&sem->wait_list))
-		__up(sem);
-	else
-		sem->counter++;
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-
-static inline int sem_getcount(struct semaphore *sem)
-{
-	return sem->counter;
-}
-
-#endif /* __ASSEMBLY__ */
-
-#endif
+#include <linux/semaphore.h>
diff --git a/include/asm-h8300/semaphore-helper.h b/include/asm-h8300/semaphore-helper.h
deleted file mode 100644
index 4fea36b..0000000
--- a/include/asm-h8300/semaphore-helper.h
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifndef _H8300_SEMAPHORE_HELPER_H
-#define _H8300_SEMAPHORE_HELPER_H
-
-/*
- * SMP- and interrupt-safe semaphores helper functions.
- *
- * (C) Copyright 1996 Linus Torvalds
- *
- * based on
- * m68k version by Andreas Schwab
- */
-
-#include <linux/errno.h>
-
-/*
- * These two _must_ execute atomically wrt each other.
- */
-static inline void wake_one_more(struct semaphore * sem)
-{
-	atomic_inc((atomic_t *)&sem->sleepers);
-}
-
-static inline int waking_non_zero(struct semaphore *sem)
-{
-	int ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	ret = 0;
-	if (sem->sleepers > 0) {
-		sem->sleepers--;
-		ret = 1;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_interruptible:
- *	1	got the lock
- *	0	go to sleep
- *	-EINTR	interrupted
- */
-static inline int waking_non_zero_interruptible(struct semaphore *sem,
-						struct task_struct *tsk)
-{
-	int ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	ret = 0;
-	if (sem->sleepers > 0) {
-		sem->sleepers--;
-		ret = 1;
-	} else if (signal_pending(tsk)) {
-		atomic_inc(&sem->count);
-		ret = -EINTR;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_trylock:
- *	1	failed to lock
- *	0	got the lock
- */
-static inline int waking_non_zero_trylock(struct semaphore *sem)
-{
-	int ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	ret = 1;
-	if (sem->sleepers <= 0)
-		atomic_inc(&sem->count);
-	else {
-		sem->sleepers--;
-		ret = 0;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-#endif
diff --git a/include/asm-h8300/semaphore.h b/include/asm-h8300/semaphore.h
index f3ffff8..d9b2034 100644
--- a/include/asm-h8300/semaphore.h
+++ b/include/asm-h8300/semaphore.h
@@ -1,190 +1 @@
-#ifndef _H8300_SEMAPHORE_H
-#define _H8300_SEMAPHORE_H
-
-#define RW_LOCK_BIAS		 0x01000000
-
-#ifndef __ASSEMBLY__
-
-#include <linux/linkage.h>
-#include <linux/wait.h>
-#include <linux/spinlock.h>
-#include <linux/rwsem.h>
-
-#include <asm/system.h>
-#include <asm/atomic.h>
-
-/*
- * Interrupt-safe semaphores..
- *
- * (C) Copyright 1996 Linus Torvalds
- *
- * H8/300 version by Yoshinori Sato
- */
-
-
-struct semaphore {
-	atomic_t count;
-	int sleepers;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.sleepers	= 0,						\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-	*sem = (struct semaphore)__SEMAPHORE_INITIALIZER(*sem, val);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-asmlinkage void __down_failed(void /* special register calling convention */);
-asmlinkage int  __down_failed_interruptible(void  /* params in registers */);
-asmlinkage int  __down_failed_trylock(void  /* params in registers */);
-asmlinkage void __up_wakeup(void /* special register calling convention */);
-
-asmlinkage void __down(struct semaphore * sem);
-asmlinkage int  __down_interruptible(struct semaphore * sem);
-asmlinkage int  __down_trylock(struct semaphore * sem);
-asmlinkage void __up(struct semaphore * sem);
-
-extern spinlock_t semaphore_wake_lock;
-
-/*
- * This is ugly, but we want the default case to fall through.
- * "down_failed" is a special asm handler that calls the C
- * routine that actually waits. See arch/m68k/lib/semaphore.S
- */
-static inline void down(struct semaphore * sem)
-{
-	register atomic_t *count asm("er0");
-
-	might_sleep();
-
-	count = &(sem->count);
-	__asm__ __volatile__(
-		"stc ccr,r3l\n\t"
-		"orc #0x80,ccr\n\t"
-		"mov.l %2, er1\n\t"
-		"dec.l #1,er1\n\t"
-		"mov.l er1,%0\n\t"
-		"bpl 1f\n\t"
-		"ldc r3l,ccr\n\t"
-		"mov.l %1,er0\n\t"
-		"jsr @___down\n\t"
-		"bra 2f\n"
-		"1:\n\t"
-		"ldc r3l,ccr\n"
-		"2:"
-		: "=m"(*count)
-		: "g"(sem),"m"(*count)
-		: "cc",  "er1", "er2", "er3");
-}
-
-static inline int down_interruptible(struct semaphore * sem)
-{
-	register atomic_t *count asm("er0");
-
-	might_sleep();
-
-	count = &(sem->count);
-	__asm__ __volatile__(
-		"stc ccr,r1l\n\t"
-		"orc #0x80,ccr\n\t"
-		"mov.l %3, er2\n\t"
-		"dec.l #1,er2\n\t"
-		"mov.l er2,%1\n\t"
-		"bpl 1f\n\t"
-		"ldc r1l,ccr\n\t"
-		"mov.l %2,er0\n\t"
-		"jsr @___down_interruptible\n\t"
-		"bra 2f\n"
-		"1:\n\t"
-		"ldc r1l,ccr\n\t"
-		"sub.l %0,%0\n\t"
-		"2:\n\t"
-		: "=r" (count),"=m" (*count)
-		: "g"(sem),"m"(*count)
-		: "cc", "er1", "er2", "er3");
-	return (int)count;
-}
-
-static inline int down_trylock(struct semaphore * sem)
-{
-	register atomic_t *count asm("er0");
-
-	count = &(sem->count);
-	__asm__ __volatile__(
-		"stc ccr,r3l\n\t"
-		"orc #0x80,ccr\n\t"
-		"mov.l %3,er2\n\t"
-		"dec.l #1,er2\n\t"
-		"mov.l er2,%0\n\t"
-		"bpl 1f\n\t"
-		"ldc r3l,ccr\n\t"
-		"jmp @3f\n\t"
-		LOCK_SECTION_START(".align 2\n\t")
-		"3:\n\t"
-		"mov.l %2,er0\n\t"
-		"jsr @___down_trylock\n\t"
-		"jmp @2f\n\t"
-		LOCK_SECTION_END
-		"1:\n\t"
-		"ldc r3l,ccr\n\t"
-		"sub.l %1,%1\n"
-		"2:"
-		: "=m" (*count),"=r"(count)
-		: "g"(sem),"m"(*count)
-		: "cc", "er1","er2", "er3");
-	return (int)count;
-}
-
-/*
- * Note! This is subtle. We jump to wake people up only if
- * the semaphore was negative (== somebody was waiting on it).
- * The default case (no contention) will result in NO
- * jumps for both down() and up().
- */
-static inline void up(struct semaphore * sem)
-{
-	register atomic_t *count asm("er0");
-
-	count = &(sem->count);
-	__asm__ __volatile__(
-		"stc ccr,r3l\n\t"
-		"orc #0x80,ccr\n\t"
-		"mov.l %2,er1\n\t"
-		"inc.l #1,er1\n\t"
-		"mov.l er1,%0\n\t"
-		"ldc r3l,ccr\n\t"
-		"sub.l er2,er2\n\t"
-		"cmp.l er2,er1\n\t"
-		"bgt 1f\n\t"
-		"mov.l %1,er0\n\t"
-		"jsr @___up\n"
-		"1:"
-		: "=m"(*count)
-		: "g"(sem),"m"(*count)
-		: "cc", "er1", "er2", "er3");
-}
-
-#endif /* __ASSEMBLY__ */
-
-#endif
+#include <linux/semaphore.h>
diff --git a/include/asm-ia64/semaphore.h b/include/asm-ia64/semaphore.h
index d8393d1..d9b2034 100644
--- a/include/asm-ia64/semaphore.h
+++ b/include/asm-ia64/semaphore.h
@@ -1,99 +1 @@
-#ifndef _ASM_IA64_SEMAPHORE_H
-#define _ASM_IA64_SEMAPHORE_H
-
-/*
- * Copyright (C) 1998-2000 Hewlett-Packard Co
- * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-#include <asm/atomic.h>
-
-struct semaphore {
-	atomic_t count;
-	int sleepers;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.sleepers	= 0,						\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count)					\
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name, count)
-
-#define DECLARE_MUTEX(name)		__DECLARE_SEMAPHORE_GENERIC(name, 1)
-
-static inline void
-sema_init (struct semaphore *sem, int val)
-{
-	*sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val);
-}
-
-static inline void
-init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void
-init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-extern void __down (struct semaphore * sem);
-extern int  __down_interruptible (struct semaphore * sem);
-extern int  __down_trylock (struct semaphore * sem);
-extern void __up (struct semaphore * sem);
-
-/*
- * Atomically decrement the semaphore's count.  If it goes negative,
- * block the calling thread in the TASK_UNINTERRUPTIBLE state.
- */
-static inline void
-down (struct semaphore *sem)
-{
-	might_sleep();
-	if (ia64_fetchadd(-1, &sem->count.counter, acq) < 1)
-		__down(sem);
-}
-
-/*
- * Atomically decrement the semaphore's count.  If it goes negative,
- * block the calling thread in the TASK_INTERRUPTIBLE state.
- */
-static inline int
-down_interruptible (struct semaphore * sem)
-{
-	int ret = 0;
-
-	might_sleep();
-	if (ia64_fetchadd(-1, &sem->count.counter, acq) < 1)
-		ret = __down_interruptible(sem);
-	return ret;
-}
-
-static inline int
-down_trylock (struct semaphore *sem)
-{
-	int ret = 0;
-
-	if (ia64_fetchadd(-1, &sem->count.counter, acq) < 1)
-		ret = __down_trylock(sem);
-	return ret;
-}
-
-static inline void
-up (struct semaphore * sem)
-{
-	if (ia64_fetchadd(1, &sem->count.counter, rel) <= -1)
-		__up(sem);
-}
-
-#endif /* _ASM_IA64_SEMAPHORE_H */
+#include <linux/semaphore.h>
diff --git a/include/asm-m32r/semaphore.h b/include/asm-m32r/semaphore.h
index b5bf95a..d9b2034 100644
--- a/include/asm-m32r/semaphore.h
+++ b/include/asm-m32r/semaphore.h
@@ -1,144 +1 @@
-#ifndef _ASM_M32R_SEMAPHORE_H
-#define _ASM_M32R_SEMAPHORE_H
-
-#include <linux/linkage.h>
-
-#ifdef __KERNEL__
-
-/*
- * SMP- and interrupt-safe semaphores..
- *
- * Copyright (C) 1996  Linus Torvalds
- * Copyright (C) 2004, 2006  Hirokazu Takata <takata at linux-m32r.org>
- */
-
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-#include <asm/assembler.h>
-#include <asm/system.h>
-#include <asm/atomic.h>
-
-struct semaphore {
-	atomic_t count;
-	int sleepers;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.sleepers	= 0,						\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-/*
- *	*sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
- *
- * i'd rather use the more flexible initialization above, but sadly
- * GCC 2.7.2.3 emits a bogus warning. EGCS doesnt. Oh well.
- */
-	atomic_set(&sem->count, val);
-	sem->sleepers = 0;
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-asmlinkage void __down_failed(void /* special register calling convention */);
-asmlinkage int  __down_failed_interruptible(void  /* params in registers */);
-asmlinkage int  __down_failed_trylock(void  /* params in registers */);
-asmlinkage void __up_wakeup(void /* special register calling convention */);
-
-asmlinkage void __down(struct semaphore * sem);
-asmlinkage int  __down_interruptible(struct semaphore * sem);
-asmlinkage int  __down_trylock(struct semaphore * sem);
-asmlinkage void __up(struct semaphore * sem);
-
-/*
- * Atomically decrement the semaphore's count.  If it goes negative,
- * block the calling thread in the TASK_UNINTERRUPTIBLE state.
- */
-static inline void down(struct semaphore * sem)
-{
-	might_sleep();
-	if (unlikely(atomic_dec_return(&sem->count) < 0))
-		__down(sem);
-}
-
-/*
- * Interruptible try to acquire a semaphore.  If we obtained
- * it, return zero.  If we were interrupted, returns -EINTR
- */
-static inline int down_interruptible(struct semaphore * sem)
-{
-	int result = 0;
-
-	might_sleep();
-	if (unlikely(atomic_dec_return(&sem->count) < 0))
-		result = __down_interruptible(sem);
-
-	return result;
-}
-
-/*
- * Non-blockingly attempt to down() a semaphore.
- * Returns zero if we acquired it
- */
-static inline int down_trylock(struct semaphore * sem)
-{
-	unsigned long flags;
-	long count;
-	int result = 0;
-
-	local_irq_save(flags);
-	__asm__ __volatile__ (
-		"# down_trylock			\n\t"
-		DCACHE_CLEAR("%0", "r4", "%1")
-		M32R_LOCK" %0, @%1;		\n\t"
-		"addi	%0, #-1;		\n\t"
-		M32R_UNLOCK" %0, @%1;		\n\t"
-		: "=&r" (count)
-		: "r" (&sem->count)
-		: "memory"
-#ifdef CONFIG_CHIP_M32700_TS1
-		, "r4"
-#endif	/* CONFIG_CHIP_M32700_TS1 */
-	);
-	local_irq_restore(flags);
-
-	if (unlikely(count < 0))
-		result = __down_trylock(sem);
-
-	return result;
-}
-
-/*
- * Note! This is subtle. We jump to wake people up only if
- * the semaphore was negative (== somebody was waiting on it).
- * The default case (no contention) will result in NO
- * jumps for both down() and up().
- */
-static inline void up(struct semaphore * sem)
-{
-	if (unlikely(atomic_inc_return(&sem->count) <= 0))
-		__up(sem);
-}
-
-#endif  /* __KERNEL__ */
-
-#endif  /* _ASM_M32R_SEMAPHORE_H */
+#include <linux/semaphore.h>
diff --git a/include/asm-m68k/semaphore-helper.h b/include/asm-m68k/semaphore-helper.h
deleted file mode 100644
index eef30ba..0000000
--- a/include/asm-m68k/semaphore-helper.h
+++ /dev/null
@@ -1,142 +0,0 @@
-#ifndef _M68K_SEMAPHORE_HELPER_H
-#define _M68K_SEMAPHORE_HELPER_H
-
-/*
- * SMP- and interrupt-safe semaphores helper functions.
- *
- * (C) Copyright 1996 Linus Torvalds
- *
- * m68k version by Andreas Schwab
- */
-
-#include <linux/errno.h>
-
-/*
- * These two _must_ execute atomically wrt each other.
- */
-static inline void wake_one_more(struct semaphore * sem)
-{
-	atomic_inc(&sem->waking);
-}
-
-#ifndef CONFIG_RMW_INSNS
-extern spinlock_t semaphore_wake_lock;
-#endif
-
-static inline int waking_non_zero(struct semaphore *sem)
-{
-	int ret;
-#ifndef CONFIG_RMW_INSNS
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	ret = 0;
-	if (atomic_read(&sem->waking) > 0) {
-		atomic_dec(&sem->waking);
-		ret = 1;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-#else
-	int tmp1, tmp2;
-
-	__asm__ __volatile__
-	  ("1:	movel	%1,%2\n"
-	   "    jle	2f\n"
-	   "	subql	#1,%2\n"
-	   "	casl	%1,%2,%3\n"
-	   "	jne	1b\n"
-	   "	moveq	#1,%0\n"
-	   "2:"
-	   : "=d" (ret), "=d" (tmp1), "=d" (tmp2)
-	   : "m" (sem->waking), "0" (0), "1" (sem->waking));
-#endif
-
-	return ret;
-}
-
-/*
- * waking_non_zero_interruptible:
- *	1	got the lock
- *	0	go to sleep
- *	-EINTR	interrupted
- */
-static inline int waking_non_zero_interruptible(struct semaphore *sem,
-						struct task_struct *tsk)
-{
-	int ret;
-#ifndef CONFIG_RMW_INSNS
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	ret = 0;
-	if (atomic_read(&sem->waking) > 0) {
-		atomic_dec(&sem->waking);
-		ret = 1;
-	} else if (signal_pending(tsk)) {
-		atomic_inc(&sem->count);
-		ret = -EINTR;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-#else
-	int tmp1, tmp2;
-
-	__asm__ __volatile__
-	  ("1:	movel	%1,%2\n"
-	   "	jle	2f\n"
-	   "	subql	#1,%2\n"
-	   "	casl	%1,%2,%3\n"
-	   "	jne	1b\n"
-	   "	moveq	#1,%0\n"
-	   "	jra	%a4\n"
-	   "2:"
-	   : "=d" (ret), "=d" (tmp1), "=d" (tmp2)
-	   : "m" (sem->waking), "i" (&&next), "0" (0), "1" (sem->waking));
-	if (signal_pending(tsk)) {
-		atomic_inc(&sem->count);
-		ret = -EINTR;
-	}
-next:
-#endif
-
-	return ret;
-}
-
-/*
- * waking_non_zero_trylock:
- *	1	failed to lock
- *	0	got the lock
- */
-static inline int waking_non_zero_trylock(struct semaphore *sem)
-{
-	int ret;
-#ifndef CONFIG_RMW_INSNS
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	ret = 1;
-	if (atomic_read(&sem->waking) > 0) {
-		atomic_dec(&sem->waking);
-		ret = 0;
-	} else
-		atomic_inc(&sem->count);
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-#else
-	int tmp1, tmp2;
-
-	__asm__ __volatile__
-	  ("1:	movel	%1,%2\n"
-	   "    jle	2f\n"
-	   "	subql	#1,%2\n"
-	   "	casl	%1,%2,%3\n"
-	   "	jne	1b\n"
-	   "	moveq	#0,%0\n"
-	   "2:"
-	   : "=d" (ret), "=d" (tmp1), "=d" (tmp2)
-	   : "m" (sem->waking), "0" (1), "1" (sem->waking));
-	if (ret)
-		atomic_inc(&sem->count);
-#endif
-	return ret;
-}
-
-#endif
diff --git a/include/asm-m68k/semaphore.h b/include/asm-m68k/semaphore.h
index 64d6b11..d9b2034 100644
--- a/include/asm-m68k/semaphore.h
+++ b/include/asm-m68k/semaphore.h
@@ -1,163 +1 @@
-#ifndef _M68K_SEMAPHORE_H
-#define _M68K_SEMAPHORE_H
-
-#define RW_LOCK_BIAS		 0x01000000
-
-#ifndef __ASSEMBLY__
-
-#include <linux/linkage.h>
-#include <linux/wait.h>
-#include <linux/spinlock.h>
-#include <linux/rwsem.h>
-#include <linux/stringify.h>
-
-#include <asm/system.h>
-#include <asm/atomic.h>
-
-/*
- * Interrupt-safe semaphores..
- *
- * (C) Copyright 1996 Linus Torvalds
- *
- * m68k version by Andreas Schwab
- */
-
-
-struct semaphore {
-	atomic_t count;
-	atomic_t waking;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.waking		= ATOMIC_INIT(0),				\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init(struct semaphore *sem, int val)
-{
-	*sem = (struct semaphore)__SEMAPHORE_INITIALIZER(*sem, val);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-asmlinkage void __down_failed(void /* special register calling convention */);
-asmlinkage int  __down_failed_interruptible(void  /* params in registers */);
-asmlinkage int  __down_failed_trylock(void  /* params in registers */);
-asmlinkage void __up_wakeup(void /* special register calling convention */);
-
-asmlinkage void __down(struct semaphore * sem);
-asmlinkage int  __down_interruptible(struct semaphore * sem);
-asmlinkage int  __down_trylock(struct semaphore * sem);
-asmlinkage void __up(struct semaphore * sem);
-
-/*
- * This is ugly, but we want the default case to fall through.
- * "down_failed" is a special asm handler that calls the C
- * routine that actually waits. See arch/m68k/lib/semaphore.S
- */
-static inline void down(struct semaphore *sem)
-{
-	register struct semaphore *sem1 __asm__ ("%a1") = sem;
-
-	might_sleep();
-	__asm__ __volatile__(
-		"| atomic down operation\n\t"
-		"subql #1,%0@\n\t"
-		"jmi 2f\n\t"
-		"1:\n"
-		LOCK_SECTION_START(".even\n\t")
-		"2:\tpea 1b\n\t"
-		"jbra __down_failed\n"
-		LOCK_SECTION_END
-		: /* no outputs */
-		: "a" (sem1)
-		: "memory");
-}
-
-static inline int down_interruptible(struct semaphore *sem)
-{
-	register struct semaphore *sem1 __asm__ ("%a1") = sem;
-	register int result __asm__ ("%d0");
-
-	might_sleep();
-	__asm__ __volatile__(
-		"| atomic interruptible down operation\n\t"
-		"subql #1,%1@\n\t"
-		"jmi 2f\n\t"
-		"clrl %0\n"
-		"1:\n"
-		LOCK_SECTION_START(".even\n\t")
-		"2:\tpea 1b\n\t"
-		"jbra __down_failed_interruptible\n"
-		LOCK_SECTION_END
-		: "=d" (result)
-		: "a" (sem1)
-		: "memory");
-	return result;
-}
-
-static inline int down_trylock(struct semaphore *sem)
-{
-	register struct semaphore *sem1 __asm__ ("%a1") = sem;
-	register int result __asm__ ("%d0");
-
-	__asm__ __volatile__(
-		"| atomic down trylock operation\n\t"
-		"subql #1,%1@\n\t"
-		"jmi 2f\n\t"
-		"clrl %0\n"
-		"1:\n"
-		LOCK_SECTION_START(".even\n\t")
-		"2:\tpea 1b\n\t"
-		"jbra __down_failed_trylock\n"
-		LOCK_SECTION_END
-		: "=d" (result)
-		: "a" (sem1)
-		: "memory");
-	return result;
-}
-
-/*
- * Note! This is subtle. We jump to wake people up only if
- * the semaphore was negative (== somebody was waiting on it).
- * The default case (no contention) will result in NO
- * jumps for both down() and up().
- */
-static inline void up(struct semaphore *sem)
-{
-	register struct semaphore *sem1 __asm__ ("%a1") = sem;
-
-	__asm__ __volatile__(
-		"| atomic up operation\n\t"
-		"addql #1,%0@\n\t"
-		"jle 2f\n"
-		"1:\n"
-		LOCK_SECTION_START(".even\n\t")
-		"2:\t"
-		"pea 1b\n\t"
-		"jbra __up_wakeup\n"
-		LOCK_SECTION_END
-		: /* no outputs */
-		: "a" (sem1)
-		: "memory");
-}
-
-#endif /* __ASSEMBLY__ */
-
-#endif
+#include <linux/semaphore.h>
diff --git a/include/asm-m68knommu/semaphore-helper.h b/include/asm-m68knommu/semaphore-helper.h
deleted file mode 100644
index 43da7bc..0000000
--- a/include/asm-m68knommu/semaphore-helper.h
+++ /dev/null
@@ -1,82 +0,0 @@
-#ifndef _M68K_SEMAPHORE_HELPER_H
-#define _M68K_SEMAPHORE_HELPER_H
-
-/*
- * SMP- and interrupt-safe semaphores helper functions.
- *
- * (C) Copyright 1996 Linus Torvalds
- *
- * m68k version by Andreas Schwab
- */
-
-
-/*
- * These two _must_ execute atomically wrt each other.
- */
-static inline void wake_one_more(struct semaphore * sem)
-{
-	atomic_inc(&sem->waking);
-}
-
-static inline int waking_non_zero(struct semaphore *sem)
-{
-	int ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	ret = 0;
-	if (atomic_read(&sem->waking) > 0) {
-		atomic_dec(&sem->waking);
-		ret = 1;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_interruptible:
- *	1	got the lock
- *	0	go to sleep
- *	-EINTR	interrupted
- */
-static inline int waking_non_zero_interruptible(struct semaphore *sem,
-						struct task_struct *tsk)
-{
-	int ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	ret = 0;
-	if (atomic_read(&sem->waking) > 0) {
-		atomic_dec(&sem->waking);
-		ret = 1;
-	} else if (signal_pending(tsk)) {
-		atomic_inc(&sem->count);
-		ret = -EINTR;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_trylock:
- *	1	failed to lock
- *	0	got the lock
- */
-static inline int waking_non_zero_trylock(struct semaphore *sem)
-{
-	int ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	ret = 1;
-	if (atomic_read(&sem->waking) > 0) {
-		atomic_dec(&sem->waking);
-		ret = 0;
-	} else
-		atomic_inc(&sem->count);
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-#endif
diff --git a/include/asm-m68knommu/semaphore.h b/include/asm-m68knommu/semaphore.h
index 5779eb6..d9b2034 100644
--- a/include/asm-m68knommu/semaphore.h
+++ b/include/asm-m68knommu/semaphore.h
@@ -1,153 +1 @@
-#ifndef _M68K_SEMAPHORE_H
-#define _M68K_SEMAPHORE_H
-
-#define RW_LOCK_BIAS		 0x01000000
-
-#ifndef __ASSEMBLY__
-
-#include <linux/linkage.h>
-#include <linux/wait.h>
-#include <linux/spinlock.h>
-#include <linux/rwsem.h>
-
-#include <asm/system.h>
-#include <asm/atomic.h>
-
-/*
- * Interrupt-safe semaphores..
- *
- * (C) Copyright 1996 Linus Torvalds
- *
- * m68k version by Andreas Schwab
- */
-
-
-struct semaphore {
-	atomic_t count;
-	atomic_t waking;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.waking		= ATOMIC_INIT(0),				\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-	*sem = (struct semaphore)__SEMAPHORE_INITIALIZER(*sem, val);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-asmlinkage void __down_failed(void /* special register calling convention */);
-asmlinkage int  __down_failed_interruptible(void  /* params in registers */);
-asmlinkage int  __down_failed_trylock(void  /* params in registers */);
-asmlinkage void __up_wakeup(void /* special register calling convention */);
-
-asmlinkage void __down(struct semaphore * sem);
-asmlinkage int  __down_interruptible(struct semaphore * sem);
-asmlinkage int  __down_trylock(struct semaphore * sem);
-asmlinkage void __up(struct semaphore * sem);
-
-extern spinlock_t semaphore_wake_lock;
-
-/*
- * This is ugly, but we want the default case to fall through.
- * "down_failed" is a special asm handler that calls the C
- * routine that actually waits. See arch/m68k/lib/semaphore.S
- */
-static inline void down(struct semaphore * sem)
-{
-	might_sleep();
-	__asm__ __volatile__(
-		"| atomic down operation\n\t"
-		"movel	%0, %%a1\n\t"
-		"lea	%%pc@(1f), %%a0\n\t"
-		"subql	#1, %%a1@\n\t"
-		"jmi __down_failed\n"
-		"1:"
-		: /* no outputs */
-		: "g" (sem)
-		: "cc", "%a0", "%a1", "memory");
-}
-
-static inline int down_interruptible(struct semaphore * sem)
-{
-	int ret;
-
-	might_sleep();
-	__asm__ __volatile__(
-		"| atomic down operation\n\t"
-		"movel	%1, %%a1\n\t"
-		"lea	%%pc@(1f), %%a0\n\t"
-		"subql	#1, %%a1@\n\t"
-		"jmi __down_failed_interruptible\n\t"
-		"clrl	%%d0\n"
-		"1: movel	%%d0, %0\n"
-		: "=d" (ret)
-		: "g" (sem)
-		: "cc", "%d0", "%a0", "%a1", "memory");
-	return(ret);
-}
-
-static inline int down_trylock(struct semaphore * sem)
-{
-	register struct semaphore *sem1 __asm__ ("%a1") = sem;
-	register int result __asm__ ("%d0");
-
-	__asm__ __volatile__(
-		"| atomic down trylock operation\n\t"
-		"subql #1,%1@\n\t"
-		"jmi 2f\n\t"
-		"clrl %0\n"
-		"1:\n"
-		".section .text.lock,\"ax\"\n"
-		".even\n"
-		"2:\tpea 1b\n\t"
-		"jbra __down_failed_trylock\n"
-		".previous"
-		: "=d" (result)
-		: "a" (sem1)
-		: "memory");
-	return result;
-}
-
-/*
- * Note! This is subtle. We jump to wake people up only if
- * the semaphore was negative (== somebody was waiting on it).
- * The default case (no contention) will result in NO
- * jumps for both down() and up().
- */
-static inline void up(struct semaphore * sem)
-{
-	__asm__ __volatile__(
-		"| atomic up operation\n\t"
-		"movel	%0, %%a1\n\t"
-		"lea	%%pc@(1f), %%a0\n\t"
-		"addql	#1, %%a1@\n\t"
-		"jle __up_wakeup\n"
-		"1:"
-		: /* no outputs */
-		: "g" (sem)
-		: "cc", "%a0", "%a1", "memory");
-}
-
-#endif /* __ASSEMBLY__ */
-
-#endif
+#include <linux/semaphore.h>
diff --git a/include/asm-mips/semaphore.h b/include/asm-mips/semaphore.h
index fdf8042..d9b2034 100644
--- a/include/asm-mips/semaphore.h
+++ b/include/asm-mips/semaphore.h
@@ -1,108 +1 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1996  Linus Torvalds
- * Copyright (C) 1998, 99, 2000, 01, 04  Ralf Baechle
- * Copyright (C) 1999, 2000, 01  Silicon Graphics, Inc.
- * Copyright (C) 2000, 01 MIPS Technologies, Inc.
- *
- * In all honesty, little of the old MIPS code left - the PPC64 variant was
- * just looking nice and portable so I ripped it.  Credits to whoever wrote
- * it.
- */
-#ifndef __ASM_SEMAPHORE_H
-#define __ASM_SEMAPHORE_H
-
-/*
- * Remove spinlock-based RW semaphores; RW semaphore definitions are
- * now in rwsem.h and we use the generic lib/rwsem.c implementation.
- * Rework semaphores to use atomic_dec_if_positive.
- * -- Paul Mackerras (paulus@samba.org)
- */
-
-#ifdef __KERNEL__
-
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-struct semaphore {
-	/*
-	 * Note that any negative value of count is equivalent to 0,
-	 * but additionally indicates that some process(es) might be
-	 * sleeping on `wait'.
-	 */
-	atomic_t count;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name, count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name, count)
-
-#define DECLARE_MUTEX(name)		__DECLARE_SEMAPHORE_GENERIC(name, 1)
-
-static inline void sema_init(struct semaphore *sem, int val)
-{
-	atomic_set(&sem->count, val);
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX(struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED(struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-extern void __down(struct semaphore * sem);
-extern int  __down_interruptible(struct semaphore * sem);
-extern void __up(struct semaphore * sem);
-
-static inline void down(struct semaphore * sem)
-{
-	might_sleep();
-
-	/*
-	 * Try to get the semaphore, take the slow path if we fail.
-	 */
-	if (unlikely(atomic_dec_return(&sem->count) < 0))
-		__down(sem);
-}
-
-static inline int down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-
-	might_sleep();
-
-	if (unlikely(atomic_dec_return(&sem->count) < 0))
-		ret = __down_interruptible(sem);
-	return ret;
-}
-
-static inline int down_trylock(struct semaphore * sem)
-{
-	return atomic_dec_if_positive(&sem->count) < 0;
-}
-
-static inline void up(struct semaphore * sem)
-{
-	if (unlikely(atomic_inc_return(&sem->count) <= 0))
-		__up(sem);
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASM_SEMAPHORE_H */
+#include <linux/semaphore.h>
diff --git a/include/asm-mn10300/semaphore.h b/include/asm-mn10300/semaphore.h
index 5a9e1ad..d9b2034 100644
--- a/include/asm-mn10300/semaphore.h
+++ b/include/asm-mn10300/semaphore.h
@@ -1,169 +1 @@
-/* MN10300 Semaphores
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-#ifndef _ASM_SEMAPHORE_H
-#define _ASM_SEMAPHORE_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/linkage.h>
-#include <linux/wait.h>
-#include <linux/spinlock.h>
-#include <linux/rwsem.h>
-
-#define SEMAPHORE_DEBUG		0
-
-/*
- * the semaphore definition
- * - if count is >0 then there are tokens available on the semaphore for down
- *   to collect
- * - if count is <=0 then there are no spare tokens, and anyone that wants one
- *   must wait
- * - if wait_list is not empty, then there are processes waiting for the
- *   semaphore
- */
-struct semaphore {
-	atomic_t		count;		/* it's not really atomic, it's
-						 * just that certain modules
-						 * expect to be able to access
-						 * it directly */
-	spinlock_t		wait_lock;
-	struct list_head	wait_list;
-#if SEMAPHORE_DEBUG
-	unsigned		__magic;
-#endif
-};
-
-#if SEMAPHORE_DEBUG
-# define __SEM_DEBUG_INIT(name) , (long)&(name).__magic
-#else
-# define __SEM_DEBUG_INIT(name)
-#endif
-
-
-#define __SEMAPHORE_INITIALIZER(name, init_count)			\
-{									\
-	.count		= ATOMIC_INIT(init_count),			\
-	.wait_lock	= __SPIN_LOCK_UNLOCKED((name).wait_lock),	\
-	.wait_list	= LIST_HEAD_INIT((name).wait_list)		\
-	__SEM_DEBUG_INIT(name)						\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name, count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name, 1)
-#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name, 0)
-
-static inline void sema_init(struct semaphore *sem, int val)
-{
-	*sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val);
-}
-
-static inline void init_MUTEX(struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED(struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-extern void __down(struct semaphore *sem, unsigned long flags);
-extern int  __down_interruptible(struct semaphore *sem, unsigned long flags);
-extern void __up(struct semaphore *sem);
-
-static inline void down(struct semaphore *sem)
-{
-	unsigned long flags;
-	int count;
-
-#if SEMAPHORE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-
-	spin_lock_irqsave(&sem->wait_lock, flags);
-	count = atomic_read(&sem->count);
-	if (likely(count > 0)) {
-		atomic_set(&sem->count, count - 1);
-		spin_unlock_irqrestore(&sem->wait_lock, flags);
-	} else {
-		__down(sem, flags);
-	}
-}
-
-static inline int down_interruptible(struct semaphore *sem)
-{
-	unsigned long flags;
-	int count, ret = 0;
-
-#if SEMAPHORE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-
-	spin_lock_irqsave(&sem->wait_lock, flags);
-	count = atomic_read(&sem->count);
-	if (likely(count > 0)) {
-		atomic_set(&sem->count, count - 1);
-		spin_unlock_irqrestore(&sem->wait_lock, flags);
-	} else {
-		ret = __down_interruptible(sem, flags);
-	}
-	return ret;
-}
-
-/*
- * non-blockingly attempt to down() a semaphore.
- * - returns zero if we acquired it
- */
-static inline int down_trylock(struct semaphore *sem)
-{
-	unsigned long flags;
-	int count, success = 0;
-
-#if SEMAPHORE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-
-	spin_lock_irqsave(&sem->wait_lock, flags);
-	count = atomic_read(&sem->count);
-	if (likely(count > 0)) {
-		atomic_set(&sem->count, count - 1);
-		success = 1;
-	}
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-	return !success;
-}
-
-static inline void up(struct semaphore *sem)
-{
-	unsigned long flags;
-
-#if SEMAPHORE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-
-	spin_lock_irqsave(&sem->wait_lock, flags);
-	if (!list_empty(&sem->wait_list))
-		__up(sem);
-	else
-		atomic_set(&sem->count, atomic_read(&sem->count) + 1);
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-
-static inline int sem_getcount(struct semaphore *sem)
-{
-	return atomic_read(&sem->count);
-}
-
-#endif /* __ASSEMBLY__ */
-
-#endif
+#include <linux/semaphore.h>
diff --git a/include/asm-parisc/semaphore-helper.h b/include/asm-parisc/semaphore-helper.h
deleted file mode 100644
index 387f7c1..0000000
--- a/include/asm-parisc/semaphore-helper.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifndef _ASM_PARISC_SEMAPHORE_HELPER_H
-#define _ASM_PARISC_SEMAPHORE_HELPER_H
-
-/*
- * SMP- and interrupt-safe semaphores helper functions.
- *
- * (C) Copyright 1996 Linus Torvalds
- * (C) Copyright 1999 Andrea Arcangeli
- */
-
-/*
- * These two _must_ execute atomically wrt each other.
- *
- * This is trivially done with load_locked/store_cond,
- * which we have.  Let the rest of the losers suck eggs.
- */
-static __inline__ void wake_one_more(struct semaphore * sem)
-{
-	atomic_inc((atomic_t *)&sem->waking);
-}
-
-static __inline__ int waking_non_zero(struct semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->waking > 0) {
-		sem->waking--;
-		ret = 1;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_interruptible:
- *	1	got the lock
- *	0	go to sleep
- *	-EINTR	interrupted
- *
- * We must undo the sem->count down_interruptible() increment while we are
- * protected by the spinlock in order to make atomic this atomic_inc() with the
- * atomic_read() in wake_one_more(), otherwise we can race. -arca
- */
-static __inline__ int waking_non_zero_interruptible(struct semaphore *sem,
-						struct task_struct *tsk)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->waking > 0) {
-		sem->waking--;
-		ret = 1;
-	} else if (signal_pending(tsk)) {
-		atomic_inc(&sem->count);
-		ret = -EINTR;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_trylock:
- *	1	failed to lock
- *	0	got the lock
- *
- * We must undo the sem->count down_trylock() increment while we are
- * protected by the spinlock in order to make atomic this atomic_inc() with the
- * atomic_read() in wake_one_more(), otherwise we can race. -arca
- */
-static __inline__ int waking_non_zero_trylock(struct semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 1;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->waking <= 0)
-		atomic_inc(&sem->count);
-	else {
-		sem->waking--;
-		ret = 0;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-#endif /* _ASM_PARISC_SEMAPHORE_HELPER_H */
diff --git a/include/asm-parisc/semaphore.h b/include/asm-parisc/semaphore.h
index a16271c..d9b2034 100644
--- a/include/asm-parisc/semaphore.h
+++ b/include/asm-parisc/semaphore.h
@@ -1,145 +1 @@
-/*    SMP- and interrupt-safe semaphores.
- *    PA-RISC version by Matthew Wilcox
- *
- *    Linux/PA-RISC Project (http://www.parisc-linux.org/)
- *    Copyright (C) 1996 Linus Torvalds
- *    Copyright (C) 1999-2001 Matthew Wilcox < willy at debian d0T org >
- *    Copyright (C) 2000 Grant Grundler < grundler a debian org >
- *
- *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2 of the License, or
- *    (at your option) any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License
- *    along with this program; if not, write to the Free Software
- *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef _ASM_PARISC_SEMAPHORE_H
-#define _ASM_PARISC_SEMAPHORE_H
-
-#include <linux/spinlock.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-#include <asm/system.h>
-
-/*
- * The `count' is initialised to the number of people who are allowed to
- * take the lock.  (Normally we want a mutex, so this is `1').  if
- * `count' is positive, the lock can be taken.  if it's 0, no-one is
- * waiting on it.  if it's -1, at least one task is waiting.
- */
-struct semaphore {
-	spinlock_t	sentry;
-	int		count;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.sentry		= SPIN_LOCK_UNLOCKED,				\
-	.count		= n,						\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-	*sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-static inline int sem_getcount(struct semaphore *sem)
-{
-	return sem->count;
-}
-
-asmlinkage void __down(struct semaphore * sem);
-asmlinkage int  __down_interruptible(struct semaphore * sem);
-asmlinkage void __up(struct semaphore * sem);
-
-/* Semaphores can be `tried' from irq context.  So we have to disable
- * interrupts while we're messing with the semaphore.  Sorry.
- */
-
-static inline void down(struct semaphore * sem)
-{
-	might_sleep();
-	spin_lock_irq(&sem->sentry);
-	if (sem->count > 0) {
-		sem->count--;
-	} else {
-		__down(sem);
-	}
-	spin_unlock_irq(&sem->sentry);
-}
-
-static inline int down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-	might_sleep();
-	spin_lock_irq(&sem->sentry);
-	if (sem->count > 0) {
-		sem->count--;
-	} else {
-		ret = __down_interruptible(sem);
-	}
-	spin_unlock_irq(&sem->sentry);
-	return ret;
-}
-
-/*
- * down_trylock returns 0 on success, 1 if we failed to get the lock.
- * May not sleep, but must preserve irq state
- */
-static inline int down_trylock(struct semaphore * sem)
-{
-	unsigned long flags;
-	int count;
-
-	spin_lock_irqsave(&sem->sentry, flags);
-	count = sem->count - 1;
-	if (count >= 0)
-		sem->count = count;
-	spin_unlock_irqrestore(&sem->sentry, flags);
-	return (count < 0);
-}
-
-/*
- * Note! This is subtle. We jump to wake people up only if
- * the semaphore was negative (== somebody was waiting on it).
- */
-static inline void up(struct semaphore * sem)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&sem->sentry, flags);
-	if (sem->count < 0) {
-		__up(sem);
-	} else {
-		sem->count++;
-	}
-	spin_unlock_irqrestore(&sem->sentry, flags);
-}
-
-#endif /* _ASM_PARISC_SEMAPHORE_H */
+#include <linux/semaphore.h>
diff --git a/include/asm-powerpc/semaphore.h b/include/asm-powerpc/semaphore.h
index 48dd32e..d9b2034 100644
--- a/include/asm-powerpc/semaphore.h
+++ b/include/asm-powerpc/semaphore.h
@@ -1,94 +1 @@
-#ifndef _ASM_POWERPC_SEMAPHORE_H
-#define _ASM_POWERPC_SEMAPHORE_H
-
-/*
- * Remove spinlock-based RW semaphores; RW semaphore definitions are
- * now in rwsem.h and we use the generic lib/rwsem.c implementation.
- * Rework semaphores to use atomic_dec_if_positive.
- * -- Paul Mackerras (paulus@samba.org)
- */
-
-#ifdef __KERNEL__
-
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-struct semaphore {
-	/*
-	 * Note that any negative value of count is equivalent to 0,
-	 * but additionally indicates that some process(es) might be
-	 * sleeping on `wait'.
-	 */
-	atomic_t count;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name, count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name)		__DECLARE_SEMAPHORE_GENERIC(name, 1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-	atomic_set(&sem->count, val);
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-extern void __down(struct semaphore * sem);
-extern int  __down_interruptible(struct semaphore * sem);
-extern void __up(struct semaphore * sem);
-
-static inline void down(struct semaphore * sem)
-{
-	might_sleep();
-
-	/*
-	 * Try to get the semaphore, take the slow path if we fail.
-	 */
-	if (unlikely(atomic_dec_return(&sem->count) < 0))
-		__down(sem);
-}
-
-static inline int down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-
-	might_sleep();
-
-	if (unlikely(atomic_dec_return(&sem->count) < 0))
-		ret = __down_interruptible(sem);
-	return ret;
-}
-
-static inline int down_trylock(struct semaphore * sem)
-{
-	return atomic_dec_if_positive(&sem->count) < 0;
-}
-
-static inline void up(struct semaphore * sem)
-{
-	if (unlikely(atomic_inc_return(&sem->count) <= 0))
-		__up(sem);
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_POWERPC_SEMAPHORE_H */
+#include <linux/semaphore.h>
diff --git a/include/asm-s390/cio.h b/include/asm-s390/cio.h
index 123b557..0818ecd 100644
--- a/include/asm-s390/cio.h
+++ b/include/asm-s390/cio.h
@@ -397,6 +397,10 @@
 
 extern int cio_get_iplinfo(struct cio_iplinfo *iplinfo);
 
+/* Function from drivers/s390/cio/chsc.c */
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl);
+int chsc_sstpi(void *page, void *result, size_t size);
+
 #endif
 
 #endif
diff --git a/include/asm-s390/cpu.h b/include/asm-s390/cpu.h
index 352dde1..e5a6a9b 100644
--- a/include/asm-s390/cpu.h
+++ b/include/asm-s390/cpu.h
@@ -22,4 +22,12 @@
 
 DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
 
+void s390_idle_leave(void);
+
+static inline void s390_idle_check(void)
+{
+	if ((&__get_cpu_var(s390_idle))->in_idle)
+		s390_idle_leave();
+}
+
 #endif /* _ASM_S390_CPU_H_ */
diff --git a/include/asm-s390/debug.h b/include/asm-s390/debug.h
index c00dd2b..335baf4 100644
--- a/include/asm-s390/debug.h
+++ b/include/asm-s390/debug.h
@@ -73,6 +73,7 @@
 	struct dentry* debugfs_entries[DEBUG_MAX_VIEWS];
 	struct debug_view* views[DEBUG_MAX_VIEWS];	
 	char name[DEBUG_MAX_NAME_LEN];
+	mode_t mode;
 } debug_info_t;
 
 typedef int (debug_header_proc_t) (debug_info_t* id,
@@ -122,6 +123,10 @@
 debug_info_t* debug_register(char* name, int pages, int nr_areas,
                              int buf_size);
 
+debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
+				  int buf_size, mode_t mode, uid_t uid,
+				  gid_t gid);
+
 void debug_unregister(debug_info_t* id);
 
 void debug_set_level(debug_info_t* id, int new_level);
diff --git a/include/asm-s390/extmem.h b/include/asm-s390/extmem.h
index c8802c9..33837d7 100644
--- a/include/asm-s390/extmem.h
+++ b/include/asm-s390/extmem.h
@@ -22,11 +22,12 @@
 #define SEGMENT_SHARED 0
 #define SEGMENT_EXCLUSIVE 1
 
-extern int segment_load (char *name,int segtype,unsigned long *addr,unsigned long *length);
-extern void segment_unload(char *name);
-extern void segment_save(char *name);
-extern int segment_type (char* name);
-extern int segment_modify_shared (char *name, int do_nonshared);
+int segment_load (char *name, int segtype, unsigned long *addr, unsigned long *length);
+void segment_unload(char *name);
+void segment_save(char *name);
+int segment_type (char* name);
+int segment_modify_shared (char *name, int do_nonshared);
+void segment_warning(int rc, char *seg_name);
 
 #endif
 #endif
diff --git a/include/asm-s390/hardirq.h b/include/asm-s390/hardirq.h
index 31beb18..4b7cb96 100644
--- a/include/asm-s390/hardirq.h
+++ b/include/asm-s390/hardirq.h
@@ -32,6 +32,6 @@
 
 #define HARDIRQ_BITS	8
 
-extern void account_ticks(u64 time);
+void clock_comparator_work(void);
 
 #endif /* __ASM_HARDIRQ_H */
diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h
index 801a6fd..5de3efb 100644
--- a/include/asm-s390/lowcore.h
+++ b/include/asm-s390/lowcore.h
@@ -56,6 +56,8 @@
 #define __LC_IO_INT_WORD                0x0C0
 #define __LC_MCCK_CODE                  0x0E8
 
+#define __LC_LAST_BREAK 		0x110
+
 #define __LC_RETURN_PSW                 0x200
 
 #define __LC_SAVE_AREA                  0xC00
@@ -80,7 +82,6 @@
 #define __LC_CPUID                      0xC60
 #define __LC_CPUADDR                    0xC68
 #define __LC_IPLDEV                     0xC7C
-#define __LC_JIFFY_TIMER		0xC80
 #define __LC_CURRENT			0xC90
 #define __LC_INT_CLOCK			0xC98
 #else /* __s390x__ */
@@ -103,7 +104,6 @@
 #define __LC_CPUID			0xD80
 #define __LC_CPUADDR			0xD88
 #define __LC_IPLDEV                     0xDB8
-#define __LC_JIFFY_TIMER		0xDC0
 #define __LC_CURRENT			0xDD8
 #define __LC_INT_CLOCK			0xDE8
 #endif /* __s390x__ */
@@ -276,7 +276,7 @@
 	/* entry.S sensitive area end */
 
         /* SMP info area: defined by DJB */
-        __u64        jiffy_timer;              /* 0xc80 */
+	__u64	     clock_comparator;	       /* 0xc80 */
 	__u32        ext_call_fast;            /* 0xc88 */
 	__u32        percpu_offset;            /* 0xc8c */
 	__u32        current_task;	       /* 0xc90 */
@@ -368,11 +368,12 @@
 	/* entry.S sensitive area end */
 
         /* SMP info area: defined by DJB */
-        __u64        jiffy_timer;              /* 0xdc0 */
+	__u64	     clock_comparator;	       /* 0xdc0 */
 	__u64        ext_call_fast;            /* 0xdc8 */
 	__u64        percpu_offset;            /* 0xdd0 */
 	__u64        current_task;	       /* 0xdd8 */
-	__u64        softirq_pending;	       /* 0xde0 */
+	__u32	     softirq_pending;	       /* 0xde0 */
+	__u32	     pad_0x0de4;	       /* 0xde4 */
 	__u64        int_clock;                /* 0xde8 */
         __u8         pad12[0xe00-0xdf0];       /* 0xdf0 */
 
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h
index 51d8891..8eaf343 100644
--- a/include/asm-s390/processor.h
+++ b/include/asm-s390/processor.h
@@ -175,6 +175,13 @@
 extern void show_registers(struct pt_regs *regs);
 extern void show_code(struct pt_regs *regs);
 extern void show_trace(struct task_struct *task, unsigned long *sp);
+#ifdef CONFIG_64BIT
+extern void show_last_breaking_event(struct pt_regs *regs);
+#else
+static inline void show_last_breaking_event(struct pt_regs *regs)
+{
+}
+#endif
 
 unsigned long get_wchan(struct task_struct *p);
 #define task_pt_regs(tsk) ((struct pt_regs *) \
diff --git a/include/asm-s390/semaphore.h b/include/asm-s390/semaphore.h
index 0e7001a..d9b2034 100644
--- a/include/asm-s390/semaphore.h
+++ b/include/asm-s390/semaphore.h
@@ -1,107 +1 @@
-/*
- *  include/asm-s390/semaphore.h
- *
- *  S390 version
- *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
- *
- *  Derived from "include/asm-i386/semaphore.h"
- *    (C) Copyright 1996 Linus Torvalds
- */
-
-#ifndef _S390_SEMAPHORE_H
-#define _S390_SEMAPHORE_H
-
-#include <asm/system.h>
-#include <asm/atomic.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-struct semaphore {
-	/*
-	 * Note that any negative value of count is equivalent to 0,
-	 * but additionally indicates that some process(es) might be
-	 * sleeping on `wait'.
-	 */
-	atomic_t count;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name,count) \
-	{ ATOMIC_INIT(count), __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) }
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-	atomic_set(&sem->count, val);
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-asmlinkage void __down(struct semaphore * sem);
-asmlinkage int  __down_interruptible(struct semaphore * sem);
-asmlinkage int  __down_trylock(struct semaphore * sem);
-asmlinkage void __up(struct semaphore * sem);
-
-static inline void down(struct semaphore * sem)
-{
-	might_sleep();
-	if (atomic_dec_return(&sem->count) < 0)
-		__down(sem);
-}
-
-static inline int down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-
-	might_sleep();
-	if (atomic_dec_return(&sem->count) < 0)
-		ret = __down_interruptible(sem);
-	return ret;
-}
-
-static inline int down_trylock(struct semaphore * sem)
-{
-	int old_val, new_val;
-
-	/*
-	 * This inline assembly atomically implements the equivalent
-	 * to the following C code:
-	 *   old_val = sem->count.counter;
-	 *   if ((new_val = old_val) > 0)
-	 *       sem->count.counter = --new_val;
-	 * In the ppc code this is called atomic_dec_if_positive.
-	 */
-	asm volatile(
-		"	l	%0,0(%3)\n"
-		"0:	ltr	%1,%0\n"
-		"	jle	1f\n"
-		"	ahi	%1,-1\n"
-		"	cs	%0,%1,0(%3)\n"
-		"	jl	0b\n"
-		"1:"
-		: "=&d" (old_val), "=&d" (new_val), "=m" (sem->count.counter)
-		: "a" (&sem->count.counter), "m" (sem->count.counter)
-		: "cc", "memory");
-	return old_val <= 0;
-}
-
-static inline void up(struct semaphore * sem)
-{
-	if (atomic_inc_return(&sem->count) <= 0)
-		__up(sem);
-}
-
-#endif
+#include <linux/semaphore.h>
diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h
index c7b7432..6f3821a 100644
--- a/include/asm-s390/smp.h
+++ b/include/asm-s390/smp.h
@@ -90,6 +90,9 @@
 extern void cpu_die (void) __attribute__ ((noreturn));
 extern int __cpu_up (unsigned int cpu);
 
+extern struct mutex smp_cpu_state_mutex;
+extern int smp_cpu_polarization[];
+
 extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
 	void *info, int wait);
 #endif
diff --git a/include/asm-s390/sysinfo.h b/include/asm-s390/sysinfo.h
new file mode 100644
index 0000000..abe10ae
--- /dev/null
+++ b/include/asm-s390/sysinfo.h
@@ -0,0 +1,116 @@
+/*
+ * definition for store system information stsi
+ *
+ * Copyright IBM Corp. 2001,2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Ulrich Weigand <weigand@de.ibm.com>
+ *		 Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+struct sysinfo_1_1_1 {
+	char reserved_0[32];
+	char manufacturer[16];
+	char type[4];
+	char reserved_1[12];
+	char model_capacity[16];
+	char sequence[16];
+	char plant[4];
+	char model[16];
+	char model_perm_cap[16];
+	char model_temp_cap[16];
+	char model_cap_rating[4];
+	char model_perm_cap_rating[4];
+	char model_temp_cap_rating[4];
+};
+
+struct sysinfo_1_2_1 {
+	char reserved_0[80];
+	char sequence[16];
+	char plant[4];
+	char reserved_1[2];
+	unsigned short cpu_address;
+};
+
+struct sysinfo_1_2_2 {
+	char format;
+	char reserved_0[1];
+	unsigned short acc_offset;
+	char reserved_1[24];
+	unsigned int secondary_capability;
+	unsigned int capability;
+	unsigned short cpus_total;
+	unsigned short cpus_configured;
+	unsigned short cpus_standby;
+	unsigned short cpus_reserved;
+	unsigned short adjustment[0];
+};
+
+struct sysinfo_1_2_2_extension {
+	unsigned int alt_capability;
+	unsigned short alt_adjustment[0];
+};
+
+struct sysinfo_2_2_1 {
+	char reserved_0[80];
+	char sequence[16];
+	char plant[4];
+	unsigned short cpu_id;
+	unsigned short cpu_address;
+};
+
+struct sysinfo_2_2_2 {
+	char reserved_0[32];
+	unsigned short lpar_number;
+	char reserved_1;
+	unsigned char characteristics;
+	unsigned short cpus_total;
+	unsigned short cpus_configured;
+	unsigned short cpus_standby;
+	unsigned short cpus_reserved;
+	char name[8];
+	unsigned int caf;
+	char reserved_2[16];
+	unsigned short cpus_dedicated;
+	unsigned short cpus_shared;
+};
+
+#define LPAR_CHAR_DEDICATED	(1 << 7)
+#define LPAR_CHAR_SHARED	(1 << 6)
+#define LPAR_CHAR_LIMITED	(1 << 5)
+
+struct sysinfo_3_2_2 {
+	char reserved_0[31];
+	unsigned char count;
+	struct {
+		char reserved_0[4];
+		unsigned short cpus_total;
+		unsigned short cpus_configured;
+		unsigned short cpus_standby;
+		unsigned short cpus_reserved;
+		char name[8];
+		unsigned int caf;
+		char cpi[16];
+		char reserved_1[24];
+
+	} vm[8];
+};
+
+static inline int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+	register int r0 asm("0") = (fc << 28) | sel1;
+	register int r1 asm("1") = sel2;
+
+	asm volatile(
+		"   stsi 0(%2)\n"
+		"0: jz   2f\n"
+		"1: lhi  %0,%3\n"
+		"2:\n"
+		EX_TABLE(0b, 1b)
+		: "+d" (r0) : "d" (r1), "a" (sysinfo), "K" (-ENOSYS)
+		: "cc", "memory");
+	return r0;
+}
diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h
index 15aba30..92098df 100644
--- a/include/asm-s390/system.h
+++ b/include/asm-s390/system.h
@@ -406,6 +406,8 @@
 #define local_mcck_enable()  __set_psw_mask(psw_kernel_bits)
 #define local_mcck_disable() __set_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK)
 
+int stfle(unsigned long long *list, int doublewords);
+
 #ifdef CONFIG_SMP
 
 extern void smp_ctl_set_bit(int cr, int bit);
diff --git a/include/asm-s390/timex.h b/include/asm-s390/timex.h
index 98229db..d744c3d 100644
--- a/include/asm-s390/timex.h
+++ b/include/asm-s390/timex.h
@@ -62,16 +62,18 @@
 	return clk;
 }
 
-static inline void get_clock_extended(void *dest)
+static inline unsigned long long get_clock_xt(void)
 {
-	typedef struct { unsigned long long clk[2]; } __clock_t;
+	unsigned char clk[16];
 
 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
-	asm volatile("stcke %0" : "=Q" (*((__clock_t *)dest)) : : "cc");
+	asm volatile("stcke %0" : "=Q" (clk) : : "cc");
 #else /* __GNUC__ */
-	asm volatile("stcke 0(%1)" : "=m" (*((__clock_t *)dest))
-				   : "a" ((__clock_t *)dest) : "cc");
+	asm volatile("stcke 0(%1)" : "=m" (clk)
+				   : "a" (clk) : "cc");
 #endif /* __GNUC__ */
+
+	return *((unsigned long long *)&clk[1]);
 }
 
 static inline cycles_t get_cycles(void)
@@ -81,5 +83,6 @@
 
 int get_sync_clock(unsigned long long *clock);
 void init_cpu_timer(void);
+unsigned long long monotonic_clock(void);
 
 #endif
diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h
index 35fb4f9..9e57a93 100644
--- a/include/asm-s390/tlbflush.h
+++ b/include/asm-s390/tlbflush.h
@@ -13,12 +13,14 @@
 	asm volatile("ptlb" : : : "memory");
 }
 
+#ifdef CONFIG_SMP
 /*
  * Flush all tlb entries on all cpus.
  */
+void smp_ptlb_all(void);
+
 static inline void __tlb_flush_global(void)
 {
-	extern void smp_ptlb_all(void);
 	register unsigned long reg2 asm("2");
 	register unsigned long reg3 asm("3");
 	register unsigned long reg4 asm("4");
@@ -39,6 +41,25 @@
 		: : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" );
 }
 
+static inline void __tlb_flush_full(struct mm_struct *mm)
+{
+	cpumask_t local_cpumask;
+
+	preempt_disable();
+	/*
+	 * If the process only ran on the local cpu, do a local flush.
+	 */
+	local_cpumask = cpumask_of_cpu(smp_processor_id());
+	if (cpus_equal(mm->cpu_vm_mask, local_cpumask))
+		__tlb_flush_local();
+	else
+		__tlb_flush_global();
+	preempt_enable();
+}
+#else
+#define __tlb_flush_full(mm)	__tlb_flush_local()
+#endif
+
 /*
  * Flush all tlb entries of a page table on all cpus.
  */
@@ -51,8 +72,6 @@
 
 static inline void __tlb_flush_mm(struct mm_struct * mm)
 {
-	cpumask_t local_cpumask;
-
 	if (unlikely(cpus_empty(mm->cpu_vm_mask)))
 		return;
 	/*
@@ -69,16 +88,7 @@
 				 mm->context.asce_bits);
 		return;
 	}
-	preempt_disable();
-	/*
-	 * If the process only ran on the local cpu, do a local flush.
-	 */
-	local_cpumask = cpumask_of_cpu(smp_processor_id());
-	if (cpus_equal(mm->cpu_vm_mask, local_cpumask))
-		__tlb_flush_local();
-	else
-		__tlb_flush_global();
-	preempt_enable();
+	__tlb_flush_full(mm);
 }
 
 static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
diff --git a/include/asm-s390/topology.h b/include/asm-s390/topology.h
index 613aa64..8e97b06 100644
--- a/include/asm-s390/topology.h
+++ b/include/asm-s390/topology.h
@@ -1,6 +1,29 @@
 #ifndef _ASM_S390_TOPOLOGY_H
 #define _ASM_S390_TOPOLOGY_H
 
+#include <linux/cpumask.h>
+
+#define mc_capable()	(1)
+
+cpumask_t cpu_coregroup_map(unsigned int cpu);
+
+int topology_set_cpu_management(int fc);
+void topology_schedule_update(void);
+
+#define POLARIZATION_UNKNWN	(-1)
+#define POLARIZATION_HRZ	(0)
+#define POLARIZATION_VL		(1)
+#define POLARIZATION_VM		(2)
+#define POLARIZATION_VH		(3)
+
+#ifdef CONFIG_SMP
+void s390_init_cpu_topology(void);
+#else
+static inline void s390_init_cpu_topology(void)
+{
+};
+#endif
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/include/asm-sh/semaphore-helper.h b/include/asm-sh/semaphore-helper.h
deleted file mode 100644
index bd8230c..0000000
--- a/include/asm-sh/semaphore-helper.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifndef __ASM_SH_SEMAPHORE_HELPER_H
-#define __ASM_SH_SEMAPHORE_HELPER_H
-
-/*
- * SMP- and interrupt-safe semaphores helper functions.
- *
- * (C) Copyright 1996 Linus Torvalds
- * (C) Copyright 1999 Andrea Arcangeli
- */
-
-/*
- * These two _must_ execute atomically wrt each other.
- *
- * This is trivially done with load_locked/store_cond,
- * which we have.  Let the rest of the losers suck eggs.
- */
-static __inline__ void wake_one_more(struct semaphore * sem)
-{
-	atomic_inc((atomic_t *)&sem->sleepers);
-}
-
-static __inline__ int waking_non_zero(struct semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->sleepers > 0) {
-		sem->sleepers--;
-		ret = 1;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_interruptible:
- *	1	got the lock
- *	0	go to sleep
- *	-EINTR	interrupted
- *
- * We must undo the sem->count down_interruptible() increment while we are
- * protected by the spinlock in order to make atomic this atomic_inc() with the
- * atomic_read() in wake_one_more(), otherwise we can race. -arca
- */
-static __inline__ int waking_non_zero_interruptible(struct semaphore *sem,
-						struct task_struct *tsk)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->sleepers > 0) {
-		sem->sleepers--;
-		ret = 1;
-	} else if (signal_pending(tsk)) {
-		atomic_inc(&sem->count);
-		ret = -EINTR;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_trylock:
- *	1	failed to lock
- *	0	got the lock
- *
- * We must undo the sem->count down_trylock() increment while we are
- * protected by the spinlock in order to make atomic this atomic_inc() with the
- * atomic_read() in wake_one_more(), otherwise we can race. -arca
- */
-static __inline__ int waking_non_zero_trylock(struct semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 1;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->sleepers <= 0)
-		atomic_inc(&sem->count);
-	else {
-		sem->sleepers--;
-		ret = 0;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-#endif /* __ASM_SH_SEMAPHORE_HELPER_H */
diff --git a/include/asm-sh/semaphore.h b/include/asm-sh/semaphore.h
index 9e5a37c..d9b2034 100644
--- a/include/asm-sh/semaphore.h
+++ b/include/asm-sh/semaphore.h
@@ -1,115 +1 @@
-#ifndef __ASM_SH_SEMAPHORE_H
-#define __ASM_SH_SEMAPHORE_H
-
-#include <linux/linkage.h>
-
-#ifdef __KERNEL__
-/*
- * SMP- and interrupt-safe semaphores.
- *
- * (C) Copyright 1996 Linus Torvalds
- *
- * SuperH verison by Niibe Yutaka
- *  (Currently no asm implementation but generic C code...)
- */
-
-#include <linux/spinlock.h>
-#include <linux/rwsem.h>
-#include <linux/wait.h>
-
-#include <asm/system.h>
-#include <asm/atomic.h>
-
-struct semaphore {
-	atomic_t count;
-	int sleepers;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.sleepers	= 0,						\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-/*
- *	*sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
- *
- * i'd rather use the more flexible initialization above, but sadly
- * GCC 2.7.2.3 emits a bogus warning. EGCS doesn't. Oh well.
- */
-	atomic_set(&sem->count, val);
-	sem->sleepers = 0;
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-#if 0
-asmlinkage void __down_failed(void /* special register calling convention */);
-asmlinkage int  __down_failed_interruptible(void  /* params in registers */);
-asmlinkage int  __down_failed_trylock(void  /* params in registers */);
-asmlinkage void __up_wakeup(void /* special register calling convention */);
-#endif
-
-asmlinkage void __down(struct semaphore * sem);
-asmlinkage int  __down_interruptible(struct semaphore * sem);
-asmlinkage int  __down_trylock(struct semaphore * sem);
-asmlinkage void __up(struct semaphore * sem);
-
-extern spinlock_t semaphore_wake_lock;
-
-static inline void down(struct semaphore * sem)
-{
-	might_sleep();
-	if (atomic_dec_return(&sem->count) < 0)
-		__down(sem);
-}
-
-static inline int down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-
-	might_sleep();
-	if (atomic_dec_return(&sem->count) < 0)
-		ret = __down_interruptible(sem);
-	return ret;
-}
-
-static inline int down_trylock(struct semaphore * sem)
-{
-	int ret = 0;
-
-	if (atomic_dec_return(&sem->count) < 0)
-		ret = __down_trylock(sem);
-	return ret;
-}
-
-/*
- * Note! This is subtle. We jump to wake people up only if
- * the semaphore was negative (== somebody was waiting on it).
- */
-static inline void up(struct semaphore * sem)
-{
-	if (atomic_inc_return(&sem->count) <= 0)
-		__up(sem);
-}
-
-#endif
-#endif /* __ASM_SH_SEMAPHORE_H */
+#include <linux/semaphore.h>
diff --git a/include/asm-sparc/semaphore.h b/include/asm-sparc/semaphore.h
index 8018f9f..d9b2034 100644
--- a/include/asm-sparc/semaphore.h
+++ b/include/asm-sparc/semaphore.h
@@ -1,192 +1 @@
-#ifndef _SPARC_SEMAPHORE_H
-#define _SPARC_SEMAPHORE_H
-
-/* Dinky, good for nothing, just barely irq safe, Sparc semaphores. */
-
-#ifdef __KERNEL__
-
-#include <asm/atomic.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-struct semaphore {
-	atomic24_t count;
-	int sleepers;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC24_INIT(n),				\
-	.sleepers	= 0,						\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-	atomic24_set(&sem->count, val);
-	sem->sleepers = 0;
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-extern void __down(struct semaphore * sem);
-extern int __down_interruptible(struct semaphore * sem);
-extern int __down_trylock(struct semaphore * sem);
-extern void __up(struct semaphore * sem);
-
-static inline void down(struct semaphore * sem)
-{
-	register volatile int *ptr asm("g1");
-	register int increment asm("g2");
-
-	might_sleep();
-
-	ptr = &(sem->count.counter);
-	increment = 1;
-
-	__asm__ __volatile__(
-	"mov	%%o7, %%g4\n\t"
-	"call	___atomic24_sub\n\t"
-	" add	%%o7, 8, %%o7\n\t"
-	"tst	%%g2\n\t"
-	"bl	2f\n\t"
-	" nop\n"
-	"1:\n\t"
-	".subsection 2\n"
-	"2:\n\t"
-	"save	%%sp, -64, %%sp\n\t"
-	"mov	%%g1, %%l1\n\t"
-	"mov	%%g5, %%l5\n\t"
-	"call	%3\n\t"
-	" mov	%%g1, %%o0\n\t"
-	"mov	%%l1, %%g1\n\t"
-	"ba	1b\n\t"
-	" restore %%l5, %%g0, %%g5\n\t"
-	".previous\n"
-	: "=&r" (increment)
-	: "0" (increment), "r" (ptr), "i" (__down)
-	: "g3", "g4", "g7", "memory", "cc");
-}
-
-static inline int down_interruptible(struct semaphore * sem)
-{
-	register volatile int *ptr asm("g1");
-	register int increment asm("g2");
-
-	might_sleep();
-
-	ptr = &(sem->count.counter);
-	increment = 1;
-
-	__asm__ __volatile__(
-	"mov	%%o7, %%g4\n\t"
-	"call	___atomic24_sub\n\t"
-	" add	%%o7, 8, %%o7\n\t"
-	"tst	%%g2\n\t"
-	"bl	2f\n\t"
-	" clr	%%g2\n"
-	"1:\n\t"
-	".subsection 2\n"
-	"2:\n\t"
-	"save	%%sp, -64, %%sp\n\t"
-	"mov	%%g1, %%l1\n\t"
-	"mov	%%g5, %%l5\n\t"
-	"call	%3\n\t"
-	" mov	%%g1, %%o0\n\t"
-	"mov	%%l1, %%g1\n\t"
-	"mov	%%l5, %%g5\n\t"
-	"ba	1b\n\t"
-	" restore %%o0, %%g0, %%g2\n\t"
-	".previous\n"
-	: "=&r" (increment)
-	: "0" (increment), "r" (ptr), "i" (__down_interruptible)
-	: "g3", "g4", "g7", "memory", "cc");
-
-	return increment;
-}
-
-static inline int down_trylock(struct semaphore * sem)
-{
-	register volatile int *ptr asm("g1");
-	register int increment asm("g2");
-
-	ptr = &(sem->count.counter);
-	increment = 1;
-
-	__asm__ __volatile__(
-	"mov	%%o7, %%g4\n\t"
-	"call	___atomic24_sub\n\t"
-	" add	%%o7, 8, %%o7\n\t"
-	"tst	%%g2\n\t"
-	"bl	2f\n\t"
-	" clr	%%g2\n"
-	"1:\n\t"
-	".subsection 2\n"
-	"2:\n\t"
-	"save	%%sp, -64, %%sp\n\t"
-	"mov	%%g1, %%l1\n\t"
-	"mov	%%g5, %%l5\n\t"
-	"call	%3\n\t"
-	" mov	%%g1, %%o0\n\t"
-	"mov	%%l1, %%g1\n\t"
-	"mov	%%l5, %%g5\n\t"
-	"ba	1b\n\t"
-	" restore %%o0, %%g0, %%g2\n\t"
-	".previous\n"
-	: "=&r" (increment)
-	: "0" (increment), "r" (ptr), "i" (__down_trylock)
-	: "g3", "g4", "g7", "memory", "cc");
-
-	return increment;
-}
-
-static inline void up(struct semaphore * sem)
-{
-	register volatile int *ptr asm("g1");
-	register int increment asm("g2");
-
-	ptr = &(sem->count.counter);
-	increment = 1;
-
-	__asm__ __volatile__(
-	"mov	%%o7, %%g4\n\t"
-	"call	___atomic24_add\n\t"
-	" add	%%o7, 8, %%o7\n\t"
-	"tst	%%g2\n\t"
-	"ble	2f\n\t"
-	" nop\n"
-	"1:\n\t"
-	".subsection 2\n"
-	"2:\n\t"
-	"save	%%sp, -64, %%sp\n\t"
-	"mov	%%g1, %%l1\n\t"
-	"mov	%%g5, %%l5\n\t"
-	"call	%3\n\t"
-	" mov	%%g1, %%o0\n\t"
-	"mov	%%l1, %%g1\n\t"
-	"ba	1b\n\t"
-	" restore %%l5, %%g0, %%g5\n\t"
-	".previous\n"
-	: "=&r" (increment)
-	: "0" (increment), "r" (ptr), "i" (__up)
-	: "g3", "g4", "g7", "memory", "cc");
-}	
-
-#endif /* __KERNEL__ */
-
-#endif /* !(_SPARC_SEMAPHORE_H) */
+#include <linux/semaphore.h>
diff --git a/include/asm-sparc64/semaphore.h b/include/asm-sparc64/semaphore.h
index 7f7c0c4..d9b2034 100644
--- a/include/asm-sparc64/semaphore.h
+++ b/include/asm-sparc64/semaphore.h
@@ -1,53 +1 @@
-#ifndef _SPARC64_SEMAPHORE_H
-#define _SPARC64_SEMAPHORE_H
-
-/* These are actually reasonable on the V9.
- *
- * See asm-ppc/semaphore.h for implementation commentary,
- * only sparc64 specific issues are commented here.
- */
-#ifdef __KERNEL__
-
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-struct semaphore {
-	atomic_t count;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, count) \
-	{ ATOMIC_INIT(count), \
-	  __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) }
-
-#define __DECLARE_SEMAPHORE_GENERIC(name, count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name)		__DECLARE_SEMAPHORE_GENERIC(name, 1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-	atomic_set(&sem->count, val);
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-extern void up(struct semaphore *sem);
-extern void down(struct semaphore *sem);
-extern int down_trylock(struct semaphore *sem);
-extern int down_interruptible(struct semaphore *sem);
-
-#endif /* __KERNEL__ */
-
-#endif /* !(_SPARC64_SEMAPHORE_H) */
+#include <linux/semaphore.h>
diff --git a/include/asm-um/semaphore.h b/include/asm-um/semaphore.h
index ff13c34..d9b2034 100644
--- a/include/asm-um/semaphore.h
+++ b/include/asm-um/semaphore.h
@@ -1,6 +1 @@
-#ifndef __UM_SEMAPHORE_H
-#define __UM_SEMAPHORE_H
-
-#include "asm/arch/semaphore.h"
-
-#endif
+#include <linux/semaphore.h>
diff --git a/include/asm-v850/semaphore.h b/include/asm-v850/semaphore.h
index 10ed0cc..d9b2034 100644
--- a/include/asm-v850/semaphore.h
+++ b/include/asm-v850/semaphore.h
@@ -1,84 +1 @@
-#ifndef __V850_SEMAPHORE_H__
-#define __V850_SEMAPHORE_H__
-
-#include <linux/linkage.h>
-#include <linux/spinlock.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-#include <asm/atomic.h>
-
-struct semaphore {
-	atomic_t count;
-	int sleepers;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name,count)				      \
-	{ ATOMIC_INIT (count), 0,					      \
-	  __WAIT_QUEUE_HEAD_INITIALIZER ((name).wait) }
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count)	\
-	struct semaphore name = __SEMAPHORE_INITIALIZER (name,count)
-
-#define DECLARE_MUTEX(name)		__DECLARE_SEMAPHORE_GENERIC (name,1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-	*sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init (sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init (sem, 0);
-}
-
-/*
- * special register calling convention
- */
-asmlinkage void __down_failed (void);
-asmlinkage int  __down_interruptible_failed (void);
-asmlinkage int  __down_trylock_failed (void);
-asmlinkage void __up_wakeup (void);
-
-extern void __down (struct semaphore * sem);
-extern int  __down_interruptible (struct semaphore * sem);
-extern int  __down_trylock (struct semaphore * sem);
-extern void __up (struct semaphore * sem);
-
-static inline void down (struct semaphore * sem)
-{
-	might_sleep();
-	if (atomic_dec_return (&sem->count) < 0)
-		__down (sem);
-}
-
-static inline int down_interruptible (struct semaphore * sem)
-{
-	int ret = 0;
-	might_sleep();
-	if (atomic_dec_return (&sem->count) < 0)
-		ret = __down_interruptible (sem);
-	return ret;
-}
-
-static inline int down_trylock (struct semaphore *sem)
-{
-	int ret = 0;
-	if (atomic_dec_return (&sem->count) < 0)
-		ret = __down_trylock (sem);
-	return ret;
-}
-
-static inline void up (struct semaphore * sem)
-{
-	if (atomic_inc_return (&sem->count) <= 0)
-		__up (sem);
-}
-
-#endif /* __V850_SEMAPHORE_H__ */
+#include <linux/semaphore.h>
diff --git a/include/asm-x86/semaphore.h b/include/asm-x86/semaphore.h
index 572c0b6..d9b2034 100644
--- a/include/asm-x86/semaphore.h
+++ b/include/asm-x86/semaphore.h
@@ -1,5 +1 @@
-#ifdef CONFIG_X86_32
-# include "semaphore_32.h"
-#else
-# include "semaphore_64.h"
-#endif
+#include <linux/semaphore.h>
diff --git a/include/asm-x86/semaphore_32.h b/include/asm-x86/semaphore_32.h
deleted file mode 100644
index ac96d38..0000000
--- a/include/asm-x86/semaphore_32.h
+++ /dev/null
@@ -1,175 +0,0 @@
-#ifndef _I386_SEMAPHORE_H
-#define _I386_SEMAPHORE_H
-
-#include <linux/linkage.h>
-
-#ifdef __KERNEL__
-
-/*
- * SMP- and interrupt-safe semaphores..
- *
- * (C) Copyright 1996 Linus Torvalds
- *
- * Modified 1996-12-23 by Dave Grothe <dave@gcom.com> to fix bugs in
- *                     the original code and to make semaphore waits
- *                     interruptible so that processes waiting on
- *                     semaphores can be killed.
- * Modified 1999-02-14 by Andrea Arcangeli, split the sched.c helper
- *		       functions in asm/sempahore-helper.h while fixing a
- *		       potential and subtle race discovered by Ulrich Schmid
- *		       in down_interruptible(). Since I started to play here I
- *		       also implemented the `trylock' semaphore operation.
- *          1999-07-02 Artur Skawina <skawina@geocities.com>
- *                     Optimized "0(ecx)" -> "(ecx)" (the assembler does not
- *                     do this). Changed calling sequences from push/jmp to
- *                     traditional call/ret.
- * Modified 2001-01-01 Andreas Franck <afranck@gmx.de>
- *		       Some hacks to ensure compatibility with recent
- *		       GCC snapshots, to avoid stack corruption when compiling
- *		       with -fomit-frame-pointer. It's not sure if this will
- *		       be fixed in GCC, as our previous implementation was a
- *		       bit dubious.
- *
- * If you would like to see an analysis of this implementation, please
- * ftp to gcom.com and download the file
- * /pub/linux/src/semaphore/semaphore-2.0.24.tar.gz.
- *
- */
-
-#include <asm/system.h>
-#include <asm/atomic.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-struct semaphore {
-	atomic_t count;
-	int sleepers;
-	wait_queue_head_t wait;
-};
-
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.sleepers	= 0,						\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-/*
- *	*sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
- *
- * i'd rather use the more flexible initialization above, but sadly
- * GCC 2.7.2.3 emits a bogus warning. EGCS doesn't. Oh well.
- */
-	atomic_set(&sem->count, val);
-	sem->sleepers = 0;
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-extern asmregparm void __down_failed(atomic_t *count_ptr);
-extern asmregparm int  __down_failed_interruptible(atomic_t *count_ptr);
-extern asmregparm int  __down_failed_trylock(atomic_t *count_ptr);
-extern asmregparm void __up_wakeup(atomic_t *count_ptr);
-
-/*
- * This is ugly, but we want the default case to fall through.
- * "__down_failed" is a special asm handler that calls the C
- * routine that actually waits. See arch/i386/kernel/semaphore.c
- */
-static inline void down(struct semaphore * sem)
-{
-	might_sleep();
-	__asm__ __volatile__(
-		"# atomic down operation\n\t"
-		LOCK_PREFIX "decl %0\n\t"     /* --sem->count */
-		"jns 2f\n"
-		"\tlea %0,%%eax\n\t"
-		"call __down_failed\n"
-		"2:"
-		:"+m" (sem->count)
-		:
-		:"memory","ax");
-}
-
-/*
- * Interruptible try to acquire a semaphore.  If we obtained
- * it, return zero.  If we were interrupted, returns -EINTR
- */
-static inline int down_interruptible(struct semaphore * sem)
-{
-	int result;
-
-	might_sleep();
-	__asm__ __volatile__(
-		"# atomic interruptible down operation\n\t"
-		"xorl %0,%0\n\t"
-		LOCK_PREFIX "decl %1\n\t"     /* --sem->count */
-		"jns 2f\n\t"
-		"lea %1,%%eax\n\t"
-		"call __down_failed_interruptible\n"
-		"2:"
-		:"=&a" (result), "+m" (sem->count)
-		:
-		:"memory");
-	return result;
-}
-
-/*
- * Non-blockingly attempt to down() a semaphore.
- * Returns zero if we acquired it
- */
-static inline int down_trylock(struct semaphore * sem)
-{
-	int result;
-
-	__asm__ __volatile__(
-		"# atomic interruptible down operation\n\t"
-		"xorl %0,%0\n\t"
-		LOCK_PREFIX "decl %1\n\t"     /* --sem->count */
-		"jns 2f\n\t"
-		"lea %1,%%eax\n\t"
-		"call __down_failed_trylock\n\t"
-		"2:\n"
-		:"=&a" (result), "+m" (sem->count)
-		:
-		:"memory");
-	return result;
-}
-
-/*
- * Note! This is subtle. We jump to wake people up only if
- * the semaphore was negative (== somebody was waiting on it).
- */
-static inline void up(struct semaphore * sem)
-{
-	__asm__ __volatile__(
-		"# atomic up operation\n\t"
-		LOCK_PREFIX "incl %0\n\t"     /* ++sem->count */
-		"jg 1f\n\t"
-		"lea %0,%%eax\n\t"
-		"call __up_wakeup\n"
-		"1:"
-		:"+m" (sem->count)
-		:
-		:"memory","ax");
-}
-
-#endif
-#endif
diff --git a/include/asm-x86/semaphore_64.h b/include/asm-x86/semaphore_64.h
deleted file mode 100644
index 7969430..0000000
--- a/include/asm-x86/semaphore_64.h
+++ /dev/null
@@ -1,180 +0,0 @@
-#ifndef _X86_64_SEMAPHORE_H
-#define _X86_64_SEMAPHORE_H
-
-#include <linux/linkage.h>
-
-#ifdef __KERNEL__
-
-/*
- * SMP- and interrupt-safe semaphores..
- *
- * (C) Copyright 1996 Linus Torvalds
- *
- * Modified 1996-12-23 by Dave Grothe <dave@gcom.com> to fix bugs in
- *                     the original code and to make semaphore waits
- *                     interruptible so that processes waiting on
- *                     semaphores can be killed.
- * Modified 1999-02-14 by Andrea Arcangeli, split the sched.c helper
- *		       functions in asm/sempahore-helper.h while fixing a
- *		       potential and subtle race discovered by Ulrich Schmid
- *		       in down_interruptible(). Since I started to play here I
- *		       also implemented the `trylock' semaphore operation.
- *          1999-07-02 Artur Skawina <skawina@geocities.com>
- *                     Optimized "0(ecx)" -> "(ecx)" (the assembler does not
- *                     do this). Changed calling sequences from push/jmp to
- *                     traditional call/ret.
- * Modified 2001-01-01 Andreas Franck <afranck@gmx.de>
- *		       Some hacks to ensure compatibility with recent
- *		       GCC snapshots, to avoid stack corruption when compiling
- *		       with -fomit-frame-pointer. It's not sure if this will
- *		       be fixed in GCC, as our previous implementation was a
- *		       bit dubious.
- *
- * If you would like to see an analysis of this implementation, please
- * ftp to gcom.com and download the file
- * /pub/linux/src/semaphore/semaphore-2.0.24.tar.gz.
- *
- */
-
-#include <asm/system.h>
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-#include <linux/stringify.h>
-
-struct semaphore {
-	atomic_t count;
-	int sleepers;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.sleepers	= 0,						\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-/*
- *	*sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
- *
- * i'd rather use the more flexible initialization above, but sadly
- * GCC 2.7.2.3 emits a bogus warning. EGCS doesn't. Oh well.
- */
-	atomic_set(&sem->count, val);
-	sem->sleepers = 0;
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-asmlinkage void __down_failed(void /* special register calling convention */);
-asmlinkage int  __down_failed_interruptible(void  /* params in registers */);
-asmlinkage int  __down_failed_trylock(void  /* params in registers */);
-asmlinkage void __up_wakeup(void /* special register calling convention */);
-
-asmlinkage void __down(struct semaphore * sem);
-asmlinkage int  __down_interruptible(struct semaphore * sem);
-asmlinkage int  __down_trylock(struct semaphore * sem);
-asmlinkage void __up(struct semaphore * sem);
-
-/*
- * This is ugly, but we want the default case to fall through.
- * "__down_failed" is a special asm handler that calls the C
- * routine that actually waits. See arch/x86_64/kernel/semaphore.c
- */
-static inline void down(struct semaphore * sem)
-{
-	might_sleep();
-
-	__asm__ __volatile__(
-		"# atomic down operation\n\t"
-		LOCK_PREFIX "decl %0\n\t"     /* --sem->count */
-		"jns 1f\n\t"
-		"call __down_failed\n"
-		"1:"
-		:"=m" (sem->count)
-		:"D" (sem)
-		:"memory");
-}
-
-/*
- * Interruptible try to acquire a semaphore.  If we obtained
- * it, return zero.  If we were interrupted, returns -EINTR
- */
-static inline int down_interruptible(struct semaphore * sem)
-{
-	int result;
-
-	might_sleep();
-
-	__asm__ __volatile__(
-		"# atomic interruptible down operation\n\t"
-		"xorl %0,%0\n\t"
-		LOCK_PREFIX "decl %1\n\t"     /* --sem->count */
-		"jns 2f\n\t"
-		"call __down_failed_interruptible\n"
-		"2:\n"
-		:"=&a" (result), "=m" (sem->count)
-		:"D" (sem)
-		:"memory");
-	return result;
-}
-
-/*
- * Non-blockingly attempt to down() a semaphore.
- * Returns zero if we acquired it
- */
-static inline int down_trylock(struct semaphore * sem)
-{
-	int result;
-
-	__asm__ __volatile__(
-		"# atomic interruptible down operation\n\t"
-		"xorl %0,%0\n\t"
-		LOCK_PREFIX "decl %1\n\t"     /* --sem->count */
-		"jns 2f\n\t"
-		"call __down_failed_trylock\n\t"
-		"2:\n"
-		:"=&a" (result), "=m" (sem->count)
-		:"D" (sem)
-		:"memory","cc");
-	return result;
-}
-
-/*
- * Note! This is subtle. We jump to wake people up only if
- * the semaphore was negative (== somebody was waiting on it).
- * The default case (no contention) will result in NO
- * jumps for both down() and up().
- */
-static inline void up(struct semaphore * sem)
-{
-	__asm__ __volatile__(
-		"# atomic up operation\n\t"
-		LOCK_PREFIX "incl %0\n\t"     /* ++sem->count */
-		"jg 1f\n\t"
-		"call __up_wakeup\n"
-		"1:"
-		:"=m" (sem->count)
-		:"D" (sem)
-		:"memory");
-}
-#endif /* __KERNEL__ */
-#endif
diff --git a/include/asm-xtensa/semaphore.h b/include/asm-xtensa/semaphore.h
index 3e04167..d9b2034 100644
--- a/include/asm-xtensa/semaphore.h
+++ b/include/asm-xtensa/semaphore.h
@@ -1,99 +1 @@
-/*
- * linux/include/asm-xtensa/semaphore.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_SEMAPHORE_H
-#define _XTENSA_SEMAPHORE_H
-
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-struct semaphore {
-	atomic_t count;
-	int sleepers;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name,n)					\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.sleepers	= 0,						\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) 			\
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-	atomic_set(&sem->count, val);
-	sem->sleepers = 0;
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-asmlinkage void __down(struct semaphore * sem);
-asmlinkage int  __down_interruptible(struct semaphore * sem);
-asmlinkage int  __down_trylock(struct semaphore * sem);
-asmlinkage void __up(struct semaphore * sem);
-
-extern spinlock_t semaphore_wake_lock;
-
-static inline void down(struct semaphore * sem)
-{
-	might_sleep();
-
-	if (atomic_sub_return(1, &sem->count) < 0)
-		__down(sem);
-}
-
-static inline int down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-
-	might_sleep();
-
-	if (atomic_sub_return(1, &sem->count) < 0)
-		ret = __down_interruptible(sem);
-	return ret;
-}
-
-static inline int down_trylock(struct semaphore * sem)
-{
-	int ret = 0;
-
-	if (atomic_sub_return(1, &sem->count) < 0)
-		ret = __down_trylock(sem);
-	return ret;
-}
-
-/*
- * Note! This is subtle. We jump to wake people up only if
- * the semaphore was negative (== somebody was waiting on it).
- */
-static inline void up(struct semaphore * sem)
-{
-	if (atomic_add_return(1, &sem->count) <= 0)
-		__up(sem);
-}
-
-#endif /* _XTENSA_SEMAPHORE_H */
+#include <linux/semaphore.h>
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index f8ab4ce..b5fef13 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -102,6 +102,25 @@
 extern void disable_irq(unsigned int irq);
 extern void enable_irq(unsigned int irq);
 
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
+
+extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask);
+extern int irq_can_set_affinity(unsigned int irq);
+
+#else /* CONFIG_SMP */
+
+static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
+{
+	return -EINVAL;
+}
+
+static inline int irq_can_set_affinity(unsigned int irq)
+{
+	return 0;
+}
+
+#endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */
+
 #ifdef CONFIG_GENERIC_HARDIRQS
 /*
  * Special lockdep variants of irq disabling/enabling.
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 176e5e7..1883a85 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -228,21 +228,11 @@
 
 #endif /* CONFIG_GENERIC_PENDING_IRQ */
 
-extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask);
-extern int irq_can_set_affinity(unsigned int irq);
-
 #else /* CONFIG_SMP */
 
 #define move_native_irq(x)
 #define move_masked_irq(x)
 
-static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
-{
-	return -EINVAL;
-}
-
-static inline int irq_can_set_affinity(unsigned int irq) { return 0; }
-
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_IRQBALANCE
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 7d1eaa9..77323a7 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -81,7 +81,7 @@
 	MLX4_CMD_SW2HW_CQ	 = 0x16,
 	MLX4_CMD_HW2SW_CQ	 = 0x17,
 	MLX4_CMD_QUERY_CQ	 = 0x18,
-	MLX4_CMD_RESIZE_CQ	 = 0x2c,
+	MLX4_CMD_MODIFY_CQ	 = 0x2c,
 
 	/* SRQ commands */
 	MLX4_CMD_SW2HW_SRQ	 = 0x35,
diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h
index 0181e0a..071cf96 100644
--- a/include/linux/mlx4/cq.h
+++ b/include/linux/mlx4/cq.h
@@ -45,11 +45,11 @@
 	u8			sl;
 	u8			reserved1;
 	__be16			rlid;
-	u32			reserved2;
+	__be32			ipoib_status;
 	__be32			byte_cnt;
 	__be16			wqe_index;
 	__be16			checksum;
-	u8			reserved3[3];
+	u8			reserved2[3];
 	u8			owner_sr_opcode;
 };
 
@@ -85,6 +85,16 @@
 	MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR		= 0x22,
 };
 
+enum {
+	MLX4_CQE_IPOIB_STATUS_IPV4			= 1 << 22,
+	MLX4_CQE_IPOIB_STATUS_IPV4F			= 1 << 23,
+	MLX4_CQE_IPOIB_STATUS_IPV6			= 1 << 24,
+	MLX4_CQE_IPOIB_STATUS_IPV4OPT			= 1 << 25,
+	MLX4_CQE_IPOIB_STATUS_TCP			= 1 << 26,
+	MLX4_CQE_IPOIB_STATUS_UDP			= 1 << 27,
+	MLX4_CQE_IPOIB_STATUS_IPOK			= 1 << 28,
+};
+
 static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd,
 			       void __iomem *uar_page,
 			       spinlock_t *doorbell_lock)
@@ -120,4 +130,9 @@
 	MLX4_CQ_DB_REQ_NOT		= 2 << 24
 };
 
+int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq,
+		   u16 count, u16 period);
+int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq,
+		   int entries, struct mlx4_mtt *mtt);
+
 #endif /* MLX4_CQ_H */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6cdf813..ff7df1a 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -186,6 +186,7 @@
 	u32			flags;
 	u16			stat_rate_support;
 	u8			port_width_cap[MLX4_MAX_PORTS + 1];
+	int			max_gso_sz;
 };
 
 struct mlx4_buf_list {
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index 1b835ca..53c5fdb 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -48,8 +48,7 @@
 	void *			(*add)	 (struct mlx4_dev *dev);
 	void			(*remove)(struct mlx4_dev *dev, void *context);
 	void			(*event) (struct mlx4_dev *dev, void *context,
-					  enum mlx4_dev_event event, int subtype,
-					  int port);
+					  enum mlx4_dev_event event, int port);
 	struct list_head	list;
 };
 
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 09a2230..a5e43fe 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -158,10 +158,12 @@
 #define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232)
 
 enum {
-	MLX4_WQE_CTRL_NEC	= 1 << 29,
-	MLX4_WQE_CTRL_FENCE	= 1 << 6,
-	MLX4_WQE_CTRL_CQ_UPDATE	= 3 << 2,
-	MLX4_WQE_CTRL_SOLICITED	= 1 << 1,
+	MLX4_WQE_CTRL_NEC		= 1 << 29,
+	MLX4_WQE_CTRL_FENCE		= 1 << 6,
+	MLX4_WQE_CTRL_CQ_UPDATE		= 3 << 2,
+	MLX4_WQE_CTRL_SOLICITED		= 1 << 1,
+	MLX4_WQE_CTRL_IP_CSUM		= 1 << 4,
+	MLX4_WQE_CTRL_TCP_UDP_CSUM	= 1 << 5,
 };
 
 struct mlx4_wqe_ctrl_seg {
@@ -217,6 +219,11 @@
 	__be32			reservd[2];
 };
 
+struct mlx4_lso_seg {
+	__be32			mss_hdr_size;
+	__be32			header[0];
+};
+
 struct mlx4_wqe_bind_seg {
 	__be32			flags1;
 	__be32			flags2;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 6e0393a..eb560d0 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -160,14 +160,18 @@
 
 
 #ifdef __KERNEL__
-#include <linux/spinlock.h>
-#include <linux/rwsem.h>
+#include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
 
 #include <linux/dqblk_xfs.h>
 #include <linux/dqblk_v1.h>
 #include <linux/dqblk_v2.h>
 
+#include <asm/atomic.h>
+
 extern spinlock_t dq_data_lock;
 
 /* Maximal numbers of writes for quota operation (insert/delete/update)
diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
new file mode 100644
index 0000000..9cae64b
--- /dev/null
+++ b/include/linux/semaphore.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2008 Intel Corporation
+ * Author: Matthew Wilcox <willy@linux.intel.com>
+ *
+ * Distributed under the terms of the GNU GPL, version 2
+ *
+ * Please see kernel/semaphore.c for documentation of these functions
+ */
+#ifndef __LINUX_SEMAPHORE_H
+#define __LINUX_SEMAPHORE_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+/* Please don't access any members of this structure directly */
+struct semaphore {
+	spinlock_t		lock;
+	unsigned int		count;
+	struct list_head	wait_list;
+};
+
+#define __SEMAPHORE_INITIALIZER(name, n)				\
+{									\
+	.lock		= __SPIN_LOCK_UNLOCKED((name).lock),		\
+	.count		= n,						\
+	.wait_list	= LIST_HEAD_INIT((name).wait_list),		\
+}
+
+#define __DECLARE_SEMAPHORE_GENERIC(name, count) \
+	struct semaphore name = __SEMAPHORE_INITIALIZER(name, count)
+
+#define DECLARE_MUTEX(name)	__DECLARE_SEMAPHORE_GENERIC(name, 1)
+
+static inline void sema_init(struct semaphore *sem, int val)
+{
+	static struct lock_class_key __key;
+	*sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val);
+	lockdep_init_map(&sem->lock.dep_map, "semaphore->lock", &__key, 0);
+}
+
+#define init_MUTEX(sem)		sema_init(sem, 1)
+#define init_MUTEX_LOCKED(sem)	sema_init(sem, 0)
+
+extern void down(struct semaphore *sem);
+extern int __must_check down_interruptible(struct semaphore *sem);
+extern int __must_check down_killable(struct semaphore *sem);
+extern int __must_check down_trylock(struct semaphore *sem);
+extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
+extern void up(struct semaphore *sem);
+
+#endif /* __LINUX_SEMAPHORE_H */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index b00c1c7..79d59c9 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -45,9 +45,9 @@
 struct kmem_cache_node {
 	spinlock_t list_lock;	/* Protect partial list and nr_partial */
 	unsigned long nr_partial;
-	atomic_long_t nr_slabs;
 	struct list_head partial;
 #ifdef CONFIG_SLUB_DEBUG
+	atomic_long_t nr_slabs;
 	struct list_head full;
 #endif
 };
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index 64a721f..8d65bf0 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -533,7 +533,10 @@
 	__u32 num_sge;
 	__u32 opcode;
 	__u32 send_flags;
-	__u32 imm_data;
+	union {
+		__u32 imm_data;
+		__u32 invalidate_rkey;
+	} ex;
 	union {
 		struct {
 			__u64 remote_addr;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 701e7b4..95bf4ba 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -94,7 +94,7 @@
 	IB_DEVICE_SRQ_RESIZE		= (1<<13),
 	IB_DEVICE_N_NOTIFY_CQ		= (1<<14),
 	IB_DEVICE_ZERO_STAG		= (1<<15),
-	IB_DEVICE_SEND_W_INV		= (1<<16),
+	IB_DEVICE_RESERVED		= (1<<16), /* old SEND_W_INV */
 	IB_DEVICE_MEM_WINDOW		= (1<<17),
 	/*
 	 * Devices should set IB_DEVICE_UD_IP_SUM if they support
@@ -104,6 +104,8 @@
 	 * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
 	 */
 	IB_DEVICE_UD_IP_CSUM		= (1<<18),
+	IB_DEVICE_UD_TSO		= (1<<19),
+	IB_DEVICE_SEND_W_INV		= (1<<21),
 };
 
 enum ib_atomic_cap {
@@ -411,6 +413,7 @@
 	IB_WC_COMP_SWAP,
 	IB_WC_FETCH_ADD,
 	IB_WC_BIND_MW,
+	IB_WC_LSO,
 /*
  * Set value of IB_WC_RECV so consumers can test if a completion is a
  * receive by testing (opcode & IB_WC_RECV).
@@ -495,6 +498,10 @@
 	IB_QPT_RAW_ETY
 };
 
+enum ib_qp_create_flags {
+	IB_QP_CREATE_IPOIB_UD_LSO	= 1 << 0,
+};
+
 struct ib_qp_init_attr {
 	void                  (*event_handler)(struct ib_event *, void *);
 	void		       *qp_context;
@@ -504,6 +511,7 @@
 	struct ib_qp_cap	cap;
 	enum ib_sig_type	sq_sig_type;
 	enum ib_qp_type		qp_type;
+	enum ib_qp_create_flags	create_flags;
 	u8			port_num; /* special QP types only */
 };
 
@@ -617,7 +625,9 @@
 	IB_WR_SEND_WITH_IMM,
 	IB_WR_RDMA_READ,
 	IB_WR_ATOMIC_CMP_AND_SWP,
-	IB_WR_ATOMIC_FETCH_AND_ADD
+	IB_WR_ATOMIC_FETCH_AND_ADD,
+	IB_WR_LSO,
+	IB_WR_SEND_WITH_INV,
 };
 
 enum ib_send_flags {
@@ -641,7 +651,10 @@
 	int			num_sge;
 	enum ib_wr_opcode	opcode;
 	int			send_flags;
-	__be32			imm_data;
+	union {
+		__be32		imm_data;
+		u32		invalidate_rkey;
+	} ex;
 	union {
 		struct {
 			u64	remote_addr;
@@ -655,6 +668,9 @@
 		} atomic;
 		struct {
 			struct ib_ah *ah;
+			void   *header;
+			int     hlen;
+			int     mss;
 			u32	remote_qpn;
 			u32	remote_qkey;
 			u16	pkey_index; /* valid for GSI only */
@@ -730,7 +746,7 @@
 	struct ib_ucontext     *context;	/* associated user context */
 	void		       *object;		/* containing object */
 	struct list_head	list;		/* link to context's list */
-	u32			id;		/* index into kernel idr */
+	int			id;		/* index into kernel idr */
 	struct kref		ref;
 	struct rw_semaphore	mutex;		/* protects .live */
 	int			live;
@@ -971,6 +987,8 @@
 						int comp_vector,
 						struct ib_ucontext *context,
 						struct ib_udata *udata);
+	int                        (*modify_cq)(struct ib_cq *cq, u16 cq_count,
+						u16 cq_period);
 	int                        (*destroy_cq)(struct ib_cq *cq);
 	int                        (*resize_cq)(struct ib_cq *cq, int cqe,
 						struct ib_udata *udata);
@@ -1376,6 +1394,15 @@
 int ib_resize_cq(struct ib_cq *cq, int cqe);
 
 /**
+ * ib_modify_cq - Modifies moderation params of the CQ
+ * @cq: The CQ to modify.
+ * @cq_count: number of CQEs that will trigger an event
+ * @cq_period: max period of time in usec before triggering an event
+ *
+ */
+int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+
+/**
  * ib_destroy_cq - Destroys the specified CQ.
  * @cq: The CQ to destroy.
  */
diff --git a/init/Kconfig b/init/Kconfig
index a97924b..7fccf09 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -763,7 +763,7 @@
 config SLABINFO
 	bool
 	depends on PROC_FS
-	depends on SLAB || SLUB
+	depends on SLAB || SLUB_DEBUG
 	default y
 
 config RT_MUTEXES
diff --git a/kernel/Makefile b/kernel/Makefile
index 6c584c5..f45c69e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,7 +8,7 @@
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
-	    hrtimer.o rwsem.o nsproxy.o srcu.o \
+	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o pm_qos_params.o
 
 obj-$(CONFIG_SYSCTL) += sysctl_check.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2727f92..6d8de05 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1722,7 +1722,12 @@
 	use_task_css_set_links = 1;
 	do_each_thread(g, p) {
 		task_lock(p);
-		if (list_empty(&p->cg_list))
+		/*
+		 * We should check if the process is exiting, otherwise
+		 * it will race with cgroup_exit() in that the list
+		 * entry won't be deleted though the process has exited.
+		 */
+		if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
 			list_add(&p->cg_list, &p->cgroups->tasks);
 		task_unlock(p);
 	} while_each_thread(g, p);
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
new file mode 100644
index 0000000..5c2942e
--- /dev/null
+++ b/kernel/semaphore.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2008 Intel Corporation
+ * Author: Matthew Wilcox <willy@linux.intel.com>
+ *
+ * Distributed under the terms of the GNU GPL, version 2
+ *
+ * This file implements counting semaphores.
+ * A counting semaphore may be acquired 'n' times before sleeping.
+ * See mutex.c for single-acquisition sleeping locks which enforce
+ * rules which allow code to be debugged more easily.
+ */
+
+/*
+ * Some notes on the implementation:
+ *
+ * The spinlock controls access to the other members of the semaphore.
+ * down_trylock() and up() can be called from interrupt context, so we
+ * have to disable interrupts when taking the lock.  It turns out various
+ * parts of the kernel expect to be able to use down() on a semaphore in
+ * interrupt context when they know it will succeed, so we have to use
+ * irqsave variants for down(), down_interruptible() and down_killable()
+ * too.
+ *
+ * The ->count variable represents how many more tasks can acquire this
+ * semaphore.  If it's zero, there may be tasks waiting on the wait_list.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+
+static noinline void __down(struct semaphore *sem);
+static noinline int __down_interruptible(struct semaphore *sem);
+static noinline int __down_killable(struct semaphore *sem);
+static noinline int __down_timeout(struct semaphore *sem, long jiffies);
+static noinline void __up(struct semaphore *sem);
+
+/**
+ * down - acquire the semaphore
+ * @sem: the semaphore to be acquired
+ *
+ * Acquires the semaphore.  If no more tasks are allowed to acquire the
+ * semaphore, calling this function will put the task to sleep until the
+ * semaphore is released.
+ *
+ * Use of this function is deprecated, please use down_interruptible() or
+ * down_killable() instead.
+ */
+void down(struct semaphore *sem)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&sem->lock, flags);
+	if (likely(sem->count > 0))
+		sem->count--;
+	else
+		__down(sem);
+	spin_unlock_irqrestore(&sem->lock, flags);
+}
+EXPORT_SYMBOL(down);
+
+/**
+ * down_interruptible - acquire the semaphore unless interrupted
+ * @sem: the semaphore to be acquired
+ *
+ * Attempts to acquire the semaphore.  If no more tasks are allowed to
+ * acquire the semaphore, calling this function will put the task to sleep.
+ * If the sleep is interrupted by a signal, this function will return -EINTR.
+ * If the semaphore is successfully acquired, this function returns 0.
+ */
+int down_interruptible(struct semaphore *sem)
+{
+	unsigned long flags;
+	int result = 0;
+
+	spin_lock_irqsave(&sem->lock, flags);
+	if (likely(sem->count > 0))
+		sem->count--;
+	else
+		result = __down_interruptible(sem);
+	spin_unlock_irqrestore(&sem->lock, flags);
+
+	return result;
+}
+EXPORT_SYMBOL(down_interruptible);
+
+/**
+ * down_killable - acquire the semaphore unless killed
+ * @sem: the semaphore to be acquired
+ *
+ * Attempts to acquire the semaphore.  If no more tasks are allowed to
+ * acquire the semaphore, calling this function will put the task to sleep.
+ * If the sleep is interrupted by a fatal signal, this function will return
+ * -EINTR.  If the semaphore is successfully acquired, this function returns
+ * 0.
+ */
+int down_killable(struct semaphore *sem)
+{
+	unsigned long flags;
+	int result = 0;
+
+	spin_lock_irqsave(&sem->lock, flags);
+	if (likely(sem->count > 0))
+		sem->count--;
+	else
+		result = __down_killable(sem);
+	spin_unlock_irqrestore(&sem->lock, flags);
+
+	return result;
+}
+EXPORT_SYMBOL(down_killable);
+
+/**
+ * down_trylock - try to acquire the semaphore, without waiting
+ * @sem: the semaphore to be acquired
+ *
+ * Try to acquire the semaphore atomically.  Returns 0 if the mutex has
+ * been acquired successfully or 1 if it it cannot be acquired.
+ *
+ * NOTE: This return value is inverted from both spin_trylock and
+ * mutex_trylock!  Be careful about this when converting code.
+ *
+ * Unlike mutex_trylock, this function can be used from interrupt context,
+ * and the semaphore can be released by any task or interrupt.
+ */
+int down_trylock(struct semaphore *sem)
+{
+	unsigned long flags;
+	int count;
+
+	spin_lock_irqsave(&sem->lock, flags);
+	count = sem->count - 1;
+	if (likely(count >= 0))
+		sem->count = count;
+	spin_unlock_irqrestore(&sem->lock, flags);
+
+	return (count < 0);
+}
+EXPORT_SYMBOL(down_trylock);
+
+/**
+ * down_timeout - acquire the semaphore within a specified time
+ * @sem: the semaphore to be acquired
+ * @jiffies: how long to wait before failing
+ *
+ * Attempts to acquire the semaphore.  If no more tasks are allowed to
+ * acquire the semaphore, calling this function will put the task to sleep.
+ * If the semaphore is not released within the specified number of jiffies,
+ * this function returns -ETIME.  It returns 0 if the semaphore was acquired.
+ */
+int down_timeout(struct semaphore *sem, long jiffies)
+{
+	unsigned long flags;
+	int result = 0;
+
+	spin_lock_irqsave(&sem->lock, flags);
+	if (likely(sem->count > 0))
+		sem->count--;
+	else
+		result = __down_timeout(sem, jiffies);
+	spin_unlock_irqrestore(&sem->lock, flags);
+
+	return result;
+}
+EXPORT_SYMBOL(down_timeout);
+
+/**
+ * up - release the semaphore
+ * @sem: the semaphore to release
+ *
+ * Release the semaphore.  Unlike mutexes, up() may be called from any
+ * context and even by tasks which have never called down().
+ */
+void up(struct semaphore *sem)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&sem->lock, flags);
+	if (likely(list_empty(&sem->wait_list)))
+		sem->count++;
+	else
+		__up(sem);
+	spin_unlock_irqrestore(&sem->lock, flags);
+}
+EXPORT_SYMBOL(up);
+
+/* Functions for the contended case */
+
+struct semaphore_waiter {
+	struct list_head list;
+	struct task_struct *task;
+	int up;
+};
+
+/*
+ * Because this function is inlined, the 'state' parameter will be
+ * constant, and thus optimised away by the compiler.  Likewise the
+ * 'timeout' parameter for the cases without timeouts.
+ */
+static inline int __sched __down_common(struct semaphore *sem, long state,
+								long timeout)
+{
+	struct task_struct *task = current;
+	struct semaphore_waiter waiter;
+
+	list_add_tail(&waiter.list, &sem->wait_list);
+	waiter.task = task;
+	waiter.up = 0;
+
+	for (;;) {
+		if (state == TASK_INTERRUPTIBLE && signal_pending(task))
+			goto interrupted;
+		if (state == TASK_KILLABLE && fatal_signal_pending(task))
+			goto interrupted;
+		if (timeout <= 0)
+			goto timed_out;
+		__set_task_state(task, state);
+		spin_unlock_irq(&sem->lock);
+		timeout = schedule_timeout(timeout);
+		spin_lock_irq(&sem->lock);
+		if (waiter.up)
+			return 0;
+	}
+
+ timed_out:
+	list_del(&waiter.list);
+	return -ETIME;
+
+ interrupted:
+	list_del(&waiter.list);
+	return -EINTR;
+}
+
+static noinline void __sched __down(struct semaphore *sem)
+{
+	__down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+}
+
+static noinline int __sched __down_interruptible(struct semaphore *sem)
+{
+	return __down_common(sem, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+}
+
+static noinline int __sched __down_killable(struct semaphore *sem)
+{
+	return __down_common(sem, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT);
+}
+
+static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
+{
+	return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies);
+}
+
+static noinline void __sched __up(struct semaphore *sem)
+{
+	struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
+						struct semaphore_waiter, list);
+	list_del(&waiter->list);
+	waiter->up = 1;
+	wake_up_process(waiter->task);
+}
diff --git a/kernel/signal.c b/kernel/signal.c
index 6af1210..cc8303c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1757,6 +1757,45 @@
 	return 1;
 }
 
+static int ptrace_signal(int signr, siginfo_t *info,
+			 struct pt_regs *regs, void *cookie)
+{
+	if (!(current->ptrace & PT_PTRACED))
+		return signr;
+
+	ptrace_signal_deliver(regs, cookie);
+
+	/* Let the debugger run.  */
+	ptrace_stop(signr, 0, info);
+
+	/* We're back.  Did the debugger cancel the sig?  */
+	signr = current->exit_code;
+	if (signr == 0)
+		return signr;
+
+	current->exit_code = 0;
+
+	/* Update the siginfo structure if the signal has
+	   changed.  If the debugger wanted something
+	   specific in the siginfo structure then it should
+	   have updated *info via PTRACE_SETSIGINFO.  */
+	if (signr != info->si_signo) {
+		info->si_signo = signr;
+		info->si_errno = 0;
+		info->si_code = SI_USER;
+		info->si_pid = task_pid_vnr(current->parent);
+		info->si_uid = current->parent->uid;
+	}
+
+	/* If the (new) signal is now blocked, requeue it.  */
+	if (sigismember(&current->blocked, signr)) {
+		specific_send_sig_info(signr, info, current);
+		signr = 0;
+	}
+
+	return signr;
+}
+
 int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
 			  struct pt_regs *regs, void *cookie)
 {
@@ -1785,36 +1824,10 @@
 		if (!signr)
 			break; /* will return 0 */
 
-		if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
-			ptrace_signal_deliver(regs, cookie);
-
-			/* Let the debugger run.  */
-			ptrace_stop(signr, 0, info);
-
-			/* We're back.  Did the debugger cancel the sig?  */
-			signr = current->exit_code;
-			if (signr == 0)
+		if (signr != SIGKILL) {
+			signr = ptrace_signal(signr, info, regs, cookie);
+			if (!signr)
 				continue;
-
-			current->exit_code = 0;
-
-			/* Update the siginfo structure if the signal has
-			   changed.  If the debugger wanted something
-			   specific in the siginfo structure then it should
-			   have updated *info via PTRACE_SETSIGINFO.  */
-			if (signr != info->si_signo) {
-				info->si_signo = signr;
-				info->si_errno = 0;
-				info->si_code = SI_USER;
-				info->si_pid = task_pid_vnr(current->parent);
-				info->si_uid = current->parent->uid;
-			}
-
-			/* If the (new) signal is now blocked, requeue it.  */
-			if (sigismember(&current->blocked, signr)) {
-				specific_send_sig_info(signr, info, current);
-				continue;
-			}
 		}
 
 		ka = &current->sighand->action[signr-1];
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index e1bd50c..fdfa0c7 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -14,7 +14,7 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/hrtimer.h>
-#include <linux/irq.h>
+#include <linux/interrupt.h>
 #include <linux/percpu.h>
 #include <linux/profile.h>
 #include <linux/sched.h>
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 1bea399..4f38865 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -14,12 +14,14 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/hrtimer.h>
-#include <linux/irq.h>
+#include <linux/interrupt.h>
 #include <linux/percpu.h>
 #include <linux/profile.h>
 #include <linux/sched.h>
 #include <linux/tick.h>
 
+#include <asm/irq_regs.h>
+
 #include "tick-internal.h"
 
 /*
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 0258d31..450c049 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -14,7 +14,7 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/hrtimer.h>
-#include <linux/irq.h>
+#include <linux/interrupt.h>
 #include <linux/percpu.h>
 #include <linux/profile.h>
 #include <linux/sched.h>
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0796c1a..80db357 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -211,7 +211,7 @@
 config SLUB_STATS
 	default n
 	bool "Enable SLUB performance statistics"
-	depends on SLUB
+	depends on SLUB && SLUB_DEBUG && SYSFS
 	help
 	  SLUB statistics are useful to debug SLUBs allocation behavior in
 	  order find ways to optimize the allocator. This should never be
@@ -265,16 +265,6 @@
 	 This feature allows mutex semantics violations to be detected and
 	 reported.
 
-config DEBUG_SEMAPHORE
-	bool "Semaphore debugging"
-	depends on DEBUG_KERNEL
-	depends on ALPHA || FRV
-	default n
-	help
-	  If you say Y here then semaphore processing will issue lots of
-	  verbose debugging messages.  If you suspect a semaphore problem or a
-	  kernel hacker asks for this option then say Y.  Otherwise say N.
-
 config DEBUG_LOCK_ALLOC
 	bool "Lock debugging: detect incorrect freeing of live locks"
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
diff --git a/lib/Makefile b/lib/Makefile
index 23de261..28dba90 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -29,7 +29,6 @@
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
-lib-$(CONFIG_SEMAPHORE_SLEEPERS) += semaphore-sleepers.o
 lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 812dbf0..fbc11a3 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -8,6 +8,7 @@
 #include <linux/smp_lock.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
+#include <asm/semaphore.h>
 
 /*
  * The 'big kernel semaphore'
diff --git a/lib/semaphore-sleepers.c b/lib/semaphore-sleepers.c
deleted file mode 100644
index 0198782..0000000
--- a/lib/semaphore-sleepers.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * i386 and x86-64 semaphore implementation.
- *
- * (C) Copyright 1999 Linus Torvalds
- *
- * Portions Copyright 1999 Red Hat, Inc.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
- */
-#include <linux/sched.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <asm/semaphore.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-void __sched __down(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * the wait_queue_head.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_UNINTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	tsk->state = TASK_RUNNING;
-}
-
-int __sched __down_interruptible(struct semaphore *sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * wait_queue_head. The "-1" is because we're
-		 * still hoping to get the semaphore.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_INTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-	tsk->state = TASK_RUNNING;
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- *
- * We could have done the trylock with a
- * single "cmpxchg" without failure cases,
- * but then it wouldn't work on a 386.
- */
-int __down_trylock(struct semaphore *sem)
-{
-	int sleepers;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock in the
-	 * wait_queue_head.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count)) {
-		wake_up_locked(&sem->wait);
-	}
-
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	return 1;
-}
diff --git a/mm/slub.c b/mm/slub.c
index acc975f..7f8aaa2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -837,6 +837,35 @@
 	spin_unlock(&n->list_lock);
 }
 
+/* Tracking of the number of slabs for debugging purposes */
+static inline unsigned long slabs_node(struct kmem_cache *s, int node)
+{
+	struct kmem_cache_node *n = get_node(s, node);
+
+	return atomic_long_read(&n->nr_slabs);
+}
+
+static inline void inc_slabs_node(struct kmem_cache *s, int node)
+{
+	struct kmem_cache_node *n = get_node(s, node);
+
+	/*
+	 * May be called early in order to allocate a slab for the
+	 * kmem_cache_node structure. Solve the chicken-egg
+	 * dilemma by deferring the increment of the count during
+	 * bootstrap (see early_kmem_cache_node_alloc).
+	 */
+	if (!NUMA_BUILD || n)
+		atomic_long_inc(&n->nr_slabs);
+}
+static inline void dec_slabs_node(struct kmem_cache *s, int node)
+{
+	struct kmem_cache_node *n = get_node(s, node);
+
+	atomic_long_dec(&n->nr_slabs);
+}
+
+/* Object debug checks for alloc/free paths */
 static void setup_object_debug(struct kmem_cache *s, struct page *page,
 								void *object)
 {
@@ -1028,6 +1057,11 @@
 	return flags;
 }
 #define slub_debug 0
+
+static inline unsigned long slabs_node(struct kmem_cache *s, int node)
+							{ return 0; }
+static inline void inc_slabs_node(struct kmem_cache *s, int node) {}
+static inline void dec_slabs_node(struct kmem_cache *s, int node) {}
 #endif
 /*
  * Slab allocation and freeing
@@ -1066,7 +1100,6 @@
 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 {
 	struct page *page;
-	struct kmem_cache_node *n;
 	void *start;
 	void *last;
 	void *p;
@@ -1078,9 +1111,7 @@
 	if (!page)
 		goto out;
 
-	n = get_node(s, page_to_nid(page));
-	if (n)
-		atomic_long_inc(&n->nr_slabs);
+	inc_slabs_node(s, page_to_nid(page));
 	page->slab = s;
 	page->flags |= 1 << PG_slab;
 	if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
@@ -1125,6 +1156,8 @@
 		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
 		-pages);
 
+	__ClearPageSlab(page);
+	reset_page_mapcount(page);
 	__free_pages(page, s->order);
 }
 
@@ -1151,11 +1184,7 @@
 
 static void discard_slab(struct kmem_cache *s, struct page *page)
 {
-	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
-
-	atomic_long_dec(&n->nr_slabs);
-	reset_page_mapcount(page);
-	__ClearPageSlab(page);
+	dec_slabs_node(s, page_to_nid(page));
 	free_slab(s, page);
 }
 
@@ -1886,15 +1915,18 @@
 	c->node = 0;
 	c->offset = s->offset / sizeof(void *);
 	c->objsize = s->objsize;
+#ifdef CONFIG_SLUB_STATS
+	memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned));
+#endif
 }
 
 static void init_kmem_cache_node(struct kmem_cache_node *n)
 {
 	n->nr_partial = 0;
-	atomic_long_set(&n->nr_slabs, 0);
 	spin_lock_init(&n->list_lock);
 	INIT_LIST_HEAD(&n->partial);
 #ifdef CONFIG_SLUB_DEBUG
+	atomic_long_set(&n->nr_slabs, 0);
 	INIT_LIST_HEAD(&n->full);
 #endif
 }
@@ -2063,7 +2095,7 @@
 	init_tracking(kmalloc_caches, n);
 #endif
 	init_kmem_cache_node(n);
-	atomic_long_inc(&n->nr_slabs);
+	inc_slabs_node(kmalloc_caches, node);
 
 	/*
 	 * lockdep requires consistent irq usage for each lock
@@ -2376,7 +2408,7 @@
 		struct kmem_cache_node *n = get_node(s, node);
 
 		n->nr_partial -= free_list(s, n, &n->partial);
-		if (atomic_long_read(&n->nr_slabs))
+		if (slabs_node(s, node))
 			return 1;
 	}
 	free_kmem_cache_nodes(s);
@@ -2409,10 +2441,6 @@
 struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned;
 EXPORT_SYMBOL(kmalloc_caches);
 
-#ifdef CONFIG_ZONE_DMA
-static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
-#endif
-
 static int __init setup_slub_min_order(char *str)
 {
 	get_option(&str, &slub_min_order);
@@ -2472,6 +2500,7 @@
 }
 
 #ifdef CONFIG_ZONE_DMA
+static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
 
 static void sysfs_add_func(struct work_struct *w)
 {
@@ -2688,21 +2717,6 @@
 }
 EXPORT_SYMBOL(kfree);
 
-#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SLABINFO)
-static unsigned long count_partial(struct kmem_cache_node *n)
-{
-	unsigned long flags;
-	unsigned long x = 0;
-	struct page *page;
-
-	spin_lock_irqsave(&n->list_lock, flags);
-	list_for_each_entry(page, &n->partial, lru)
-		x += page->inuse;
-	spin_unlock_irqrestore(&n->list_lock, flags);
-	return x;
-}
-#endif
-
 /*
  * kmem_cache_shrink removes empty slabs from the partial lists and sorts
  * the remaining slabs by the number of items in use. The slabs with the
@@ -2816,7 +2830,7 @@
 			 * and offline_pages() function shoudn't call this
 			 * callback. So, we must fail.
 			 */
-			BUG_ON(atomic_long_read(&n->nr_slabs));
+			BUG_ON(slabs_node(s, offline_node));
 
 			s->node[offline_node] = NULL;
 			kmem_cache_free(kmalloc_caches, n);
@@ -3181,6 +3195,21 @@
 	return slab_alloc(s, gfpflags, node, caller);
 }
 
+#if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO)
+static unsigned long count_partial(struct kmem_cache_node *n)
+{
+	unsigned long flags;
+	unsigned long x = 0;
+	struct page *page;
+
+	spin_lock_irqsave(&n->list_lock, flags);
+	list_for_each_entry(page, &n->partial, lru)
+		x += page->inuse;
+	spin_unlock_irqrestore(&n->list_lock, flags);
+	return x;
+}
+#endif
+
 #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
 static int validate_slab(struct kmem_cache *s, struct page *page,
 						unsigned long *map)
@@ -3979,10 +4008,12 @@
 
 	len = sprintf(buf, "%lu", sum);
 
+#ifdef CONFIG_SMP
 	for_each_online_cpu(cpu) {
 		if (data[cpu] && len < PAGE_SIZE - 20)
-			len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]);
+			len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
 	}
+#endif
 	kfree(data);
 	return len + sprintf(buf + len, "\n");
 }
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index ffbf22a..8ea283e 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1573,7 +1573,6 @@
 	send_wr.sg_list = req->rl_send_iov;
 	send_wr.num_sge = req->rl_niovs;
 	send_wr.opcode = IB_WR_SEND;
-	send_wr.imm_data = 0;
 	if (send_wr.num_sge == 4)	/* no need to sync any pad (constant) */
 		ib_dma_sync_single_for_device(ia->ri_id->device,
 			req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
diff --git a/security/Kconfig b/security/Kconfig
index 5dfc206..49b51f9 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -113,10 +113,12 @@
 	  from userspace allocation.  Keeping a user from writing to low pages
 	  can help reduce the impact of kernel NULL pointer bugs.
 
-	  For most users with lots of address space a value of 65536 is
-	  reasonable and should cause no problems.  Programs which use vm86
-	  functionality would either need additional permissions from either
-	  the LSM or the capabilities module or have this protection disabled.
+	  For most ia64, ppc64 and x86 users with lots of address space
+	  a value of 65536 is reasonable and should cause no problems.
+	  On arm and other archs it should not be higher than 32768.
+	  Programs which use vm86 functionality would either need additional
+	  permissions from either the LSM or the capabilities module or have
+	  this protection disabled.
 
 	  This value can be changed after boot using the
 	  /proc/sys/vm/mmap_min_addr tunable.
diff --git a/security/commoncap.c b/security/commoncap.c
index 06d5c94..8529057 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -267,7 +267,7 @@
 	rc = cap_from_disk(&vcaps, bprm, rc);
 	if (rc)
 		printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n",
-			__FUNCTION__, rc, bprm->filename);
+			__func__, rc, bprm->filename);
 
 out:
 	dput(dentry);
@@ -302,7 +302,7 @@
 	ret = get_file_caps(bprm);
 	if (ret)
 		printk(KERN_NOTICE "%s: get_file_caps returned %d for %s\n",
-			__FUNCTION__, ret, bprm->filename);
+			__func__, ret, bprm->filename);
 
 	/*  To support inheritance of root-permissions and suid-root
 	 *  executables under compatibility mode, we raise all three
diff --git a/security/keys/internal.h b/security/keys/internal.h
index d36d693..7d894ef 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -22,16 +22,16 @@
 
 #ifdef __KDEBUG
 #define kenter(FMT, ...) \
-	printk(KERN_DEBUG "==> %s("FMT")\n", __FUNCTION__, ##__VA_ARGS__)
+	printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
 #define kleave(FMT, ...) \
-	printk(KERN_DEBUG "<== %s()"FMT"\n", __FUNCTION__, ##__VA_ARGS__)
+	printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
 #define kdebug(FMT, ...) \
 	printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__)
 #else
 #define kenter(FMT, ...) \
-	no_printk(KERN_DEBUG "==> %s("FMT")\n", __FUNCTION__, ##__VA_ARGS__)
+	no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
 #define kleave(FMT, ...) \
-	no_printk(KERN_DEBUG "<== %s()"FMT"\n", __FUNCTION__, ##__VA_ARGS__)
+	no_printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
 #define kdebug(FMT, ...) \
 	no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__)
 #endif
diff --git a/security/root_plug.c b/security/root_plug.c
index 870f130..6112d14 100644
--- a/security/root_plug.c
+++ b/security/root_plug.c
@@ -49,7 +49,7 @@
 	do {							\
 		if (debug)					\
 			printk(KERN_DEBUG "%s: %s: " fmt ,	\
-				MY_NAME , __FUNCTION__ , 	\
+				MY_NAME , __func__ , 	\
 				## arg);			\
 	} while (0)
 
diff --git a/security/security.c b/security/security.c
index b1387a6..9beecac 100644
--- a/security/security.c
+++ b/security/security.c
@@ -57,7 +57,7 @@
 
 	if (verify(&dummy_security_ops)) {
 		printk(KERN_ERR "%s could not verify "
-		       "dummy_security_ops structure.\n", __FUNCTION__);
+		       "dummy_security_ops structure.\n", __func__);
 		return -EIO;
 	}
 
@@ -82,7 +82,7 @@
 {
 	if (verify(ops)) {
 		printk(KERN_DEBUG "%s could not verify "
-		       "security_operations structure.\n", __FUNCTION__);
+		       "security_operations structure.\n", __func__);
 		return -EINVAL;
 	}
 
@@ -110,13 +110,13 @@
 {
 	if (verify(ops)) {
 		printk(KERN_INFO "%s could not verify "
-		       "security operations.\n", __FUNCTION__);
+		       "security operations.\n", __func__);
 		return -EINVAL;
 	}
 
 	if (ops == security_ops) {
 		printk(KERN_INFO "%s security operations "
-		       "already registered.\n", __FUNCTION__);
+		       "already registered.\n", __func__);
 		return -EINVAL;
 	}
 
diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig
index 2b517d6..a436d1c 100644
--- a/security/selinux/Kconfig
+++ b/security/selinux/Kconfig
@@ -145,7 +145,7 @@
 config SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
 	int "NSA SELinux maximum supported policy format version value"
 	depends on SECURITY_SELINUX_POLICYDB_VERSION_MAX
-	range 15 22
+	range 15 23
 	default 19
 	help
 	  This option sets the value for the maximum policy format version
diff --git a/security/selinux/Makefile b/security/selinux/Makefile
index 00afd85..d47fc5e 100644
--- a/security/selinux/Makefile
+++ b/security/selinux/Makefile
@@ -11,6 +11,7 @@
 	     nlmsgtab.o \
 	     netif.o \
 	     netnode.o \
+	     netport.o \
 	     exports.o
 
 selinux-$(CONFIG_SECURITY_NETWORK_XFRM) += xfrm.o
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 187964e..a4fc6e6 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -871,6 +871,8 @@
 	int rc = 0;
 	u32 denied;
 
+	BUG_ON(!requested);
+
 	rcu_read_lock();
 
 	node = avc_lookup(ssid, tsid, tclass, requested);
@@ -890,13 +892,14 @@
 
 	denied = requested & ~(p_ae->avd.allowed);
 
-	if (!requested || denied) {
-		if (selinux_enforcing || (flags & AVC_STRICT))
+	if (denied) {
+		if (flags & AVC_STRICT)
 			rc = -EACCES;
+		else if (!selinux_enforcing || security_permissive_sid(ssid))
+			avc_update_node(AVC_CALLBACK_GRANT, requested, ssid,
+					tsid, tclass);
 		else
-			if (node)
-				avc_update_node(AVC_CALLBACK_GRANT,requested,
-						ssid,tsid,tclass);
+			rc = -EACCES;
 	}
 
 	rcu_read_unlock();
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d39b59c..34f2d46 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -80,6 +80,7 @@
 #include "objsec.h"
 #include "netif.h"
 #include "netnode.h"
+#include "netport.h"
 #include "xfrm.h"
 #include "netlabel.h"
 
@@ -161,8 +162,7 @@
 	if (!tsec)
 		return -ENOMEM;
 
-	tsec->task = task;
-	tsec->osid = tsec->sid = tsec->ptrace_sid = SECINITSID_UNLABELED;
+	tsec->osid = tsec->sid = SECINITSID_UNLABELED;
 	task->security = tsec;
 
 	return 0;
@@ -218,7 +218,6 @@
 	if (!fsec)
 		return -ENOMEM;
 
-	fsec->file = file;
 	fsec->sid = tsec->sid;
 	fsec->fown_sid = tsec->sid;
 	file->f_security = fsec;
@@ -275,12 +274,11 @@
 	if (!ssec)
 		return -ENOMEM;
 
-	ssec->sk = sk;
 	ssec->peer_sid = SECINITSID_UNLABELED;
 	ssec->sid = SECINITSID_UNLABELED;
 	sk->sk_security = ssec;
 
-	selinux_netlbl_sk_security_init(ssec, family);
+	selinux_netlbl_sk_security_reset(ssec, family);
 
 	return 0;
 }
@@ -324,10 +322,10 @@
 };
 
 static match_table_t tokens = {
-	{Opt_context, "context=%s"},
-	{Opt_fscontext, "fscontext=%s"},
-	{Opt_defcontext, "defcontext=%s"},
-	{Opt_rootcontext, "rootcontext=%s"},
+	{Opt_context, CONTEXT_STR "%s"},
+	{Opt_fscontext, FSCONTEXT_STR "%s"},
+	{Opt_defcontext, DEFCONTEXT_STR "%s"},
+	{Opt_rootcontext, ROOTCONTEXT_STR "%s"},
 	{Opt_error, NULL},
 };
 
@@ -671,7 +669,7 @@
 	rc = security_fs_use(sb->s_type->name, &sbsec->behavior, &sbsec->sid);
 	if (rc) {
 		printk(KERN_WARNING "%s: security_fs_use(%s) returned %d\n",
-		       __FUNCTION__, sb->s_type->name, rc);
+		       __func__, sb->s_type->name, rc);
 		goto out;
 	}
 
@@ -1137,7 +1135,7 @@
 		}
 		if (!dentry) {
 			printk(KERN_WARNING "%s:  no dentry for dev=%s "
-			       "ino=%ld\n", __FUNCTION__, inode->i_sb->s_id,
+			       "ino=%ld\n", __func__, inode->i_sb->s_id,
 			       inode->i_ino);
 			goto out_unlock;
 		}
@@ -1175,7 +1173,7 @@
 		if (rc < 0) {
 			if (rc != -ENODATA) {
 				printk(KERN_WARNING "%s:  getxattr returned "
-				       "%d for dev=%s ino=%ld\n", __FUNCTION__,
+				       "%d for dev=%s ino=%ld\n", __func__,
 				       -rc, inode->i_sb->s_id, inode->i_ino);
 				kfree(context);
 				goto out_unlock;
@@ -1190,7 +1188,7 @@
 			if (rc) {
 				printk(KERN_WARNING "%s:  context_to_sid(%s) "
 				       "returned %d for dev=%s ino=%ld\n",
-				       __FUNCTION__, context, -rc,
+				       __func__, context, -rc,
 				       inode->i_sb->s_id, inode->i_ino);
 				kfree(context);
 				/* Leave with the unlabeled SID */
@@ -1618,6 +1616,35 @@
 	return av;
 }
 
+/*
+ * Convert a file mask to an access vector and include the correct open
+ * open permission.
+ */
+static inline u32 open_file_mask_to_av(int mode, int mask)
+{
+	u32 av = file_mask_to_av(mode, mask);
+
+	if (selinux_policycap_openperm) {
+		/*
+		 * lnk files and socks do not really have an 'open'
+		 */
+		if (S_ISREG(mode))
+			av |= FILE__OPEN;
+		else if (S_ISCHR(mode))
+			av |= CHR_FILE__OPEN;
+		else if (S_ISBLK(mode))
+			av |= BLK_FILE__OPEN;
+		else if (S_ISFIFO(mode))
+			av |= FIFO_FILE__OPEN;
+		else if (S_ISDIR(mode))
+			av |= DIR__OPEN;
+		else
+			printk(KERN_ERR "SELinux: WARNING: inside open_file_to_av "
+				"with unknown mode:%x\n", mode);
+	}
+	return av;
+}
+
 /* Convert a Linux file to an access vector. */
 static inline u32 file_to_av(struct file *file)
 {
@@ -1645,19 +1672,13 @@
 
 static int selinux_ptrace(struct task_struct *parent, struct task_struct *child)
 {
-	struct task_security_struct *psec = parent->security;
-	struct task_security_struct *csec = child->security;
 	int rc;
 
 	rc = secondary_ops->ptrace(parent,child);
 	if (rc)
 		return rc;
 
-	rc = task_has_perm(parent, child, PROCESS__PTRACE);
-	/* Save the SID of the tracing process for later use in apply_creds. */
-	if (!(child->ptrace & PT_PTRACED) && !rc)
-		csec->ptrace_sid = psec->sid;
-	return rc;
+	return task_has_perm(parent, child, PROCESS__PTRACE);
 }
 
 static int selinux_capget(struct task_struct *target, kernel_cap_t *effective,
@@ -1879,6 +1900,22 @@
 	return __vm_enough_memory(mm, pages, cap_sys_admin);
 }
 
+/**
+ * task_tracer_task - return the task that is tracing the given task
+ * @task:		task to consider
+ *
+ * Returns NULL if noone is tracing @task, or the &struct task_struct
+ * pointer to its tracer.
+ *
+ * Must be called under rcu_read_lock().
+ */
+static struct task_struct *task_tracer_task(struct task_struct *task)
+{
+	if (task->ptrace & PT_PTRACED)
+		return rcu_dereference(task->parent);
+	return NULL;
+}
+
 /* binprm security operations */
 
 static int selinux_bprm_alloc_security(struct linux_binprm *bprm)
@@ -1889,7 +1926,6 @@
 	if (!bsec)
 		return -ENOMEM;
 
-	bsec->bprm = bprm;
 	bsec->sid = SECINITSID_UNLABELED;
 	bsec->set = 0;
 
@@ -2126,12 +2162,25 @@
 		/* Check for ptracing, and update the task SID if ok.
 		   Otherwise, leave SID unchanged and kill. */
 		if (unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
-			rc = avc_has_perm(tsec->ptrace_sid, sid,
-					  SECCLASS_PROCESS, PROCESS__PTRACE,
-					  NULL);
-			if (rc) {
-				bsec->unsafe = 1;
-				return;
+			struct task_struct *tracer;
+			struct task_security_struct *sec;
+			u32 ptsid = 0;
+
+			rcu_read_lock();
+			tracer = task_tracer_task(current);
+			if (likely(tracer != NULL)) {
+				sec = tracer->security;
+				ptsid = sec->sid;
+			}
+			rcu_read_unlock();
+
+			if (ptsid != 0) {
+				rc = avc_has_perm(ptsid, sid, SECCLASS_PROCESS,
+						  PROCESS__PTRACE, NULL);
+				if (rc) {
+					bsec->unsafe = 1;
+					return;
+				}
 			}
 		}
 		tsec->sid = sid;
@@ -2239,10 +2288,10 @@
 
 static inline int selinux_option(char *option, int len)
 {
-	return (match_prefix("context=", sizeof("context=")-1, option, len) ||
-	        match_prefix("fscontext=", sizeof("fscontext=")-1, option, len) ||
-	        match_prefix("defcontext=", sizeof("defcontext=")-1, option, len) ||
-		match_prefix("rootcontext=", sizeof("rootcontext=")-1, option, len));
+	return (match_prefix(CONTEXT_STR, sizeof(CONTEXT_STR)-1, option, len) ||
+		match_prefix(FSCONTEXT_STR, sizeof(FSCONTEXT_STR)-1, option, len) ||
+		match_prefix(DEFCONTEXT_STR, sizeof(DEFCONTEXT_STR)-1, option, len) ||
+		match_prefix(ROOTCONTEXT_STR, sizeof(ROOTCONTEXT_STR)-1, option, len));
 }
 
 static inline void take_option(char **to, char *from, int *first, int len)
@@ -2412,7 +2461,7 @@
 			printk(KERN_WARNING "%s:  "
 			       "security_transition_sid failed, rc=%d (dev=%s "
 			       "ino=%ld)\n",
-			       __FUNCTION__,
+			       __func__,
 			       -rc, inode->i_sb->s_id, inode->i_ino);
 			return rc;
 		}
@@ -2536,7 +2585,7 @@
 	}
 
 	return inode_has_perm(current, inode,
-			       file_mask_to_av(inode->i_mode, mask), NULL);
+			       open_file_mask_to_av(inode->i_mode, mask), NULL);
 }
 
 static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
@@ -2646,7 +2695,7 @@
 	rc = security_context_to_sid(value, size, &newsid);
 	if (rc) {
 		printk(KERN_WARNING "%s:  unable to obtain SID for context "
-		       "%s, rc=%d\n", __FUNCTION__, (char*)value, -rc);
+		       "%s, rc=%d\n", __func__, (char *)value, -rc);
 		return;
 	}
 
@@ -3087,11 +3136,6 @@
 	tsec2->keycreate_sid = tsec1->keycreate_sid;
 	tsec2->sockcreate_sid = tsec1->sockcreate_sid;
 
-	/* Retain ptracer SID across fork, if any.
-	   This will be reset by the ptrace hook upon any
-	   subsequent ptrace_attach operations. */
-	tsec2->ptrace_sid = tsec1->ptrace_sid;
-
 	return 0;
 }
 
@@ -3627,10 +3671,8 @@
 			inet_get_local_port_range(&low, &high);
 
 			if (snum < max(PROT_SOCK, low) || snum > high) {
-				err = security_port_sid(sk->sk_family,
-							sk->sk_type,
-							sk->sk_protocol, snum,
-							&sid);
+				err = sel_netport_sid(sk->sk_protocol,
+						      snum, &sid);
 				if (err)
 					goto out;
 				AVC_AUDIT_DATA_INIT(&ad,NET);
@@ -3718,8 +3760,7 @@
 			snum = ntohs(addr6->sin6_port);
 		}
 
-		err = security_port_sid(sk->sk_family, sk->sk_type,
-					sk->sk_protocol, snum, &sid);
+		err = sel_netport_sid(sk->sk_protocol, snum, &sid);
 		if (err)
 			goto out;
 
@@ -3950,9 +3991,8 @@
 
 	if (!recv_perm)
 		return 0;
-	err = security_port_sid(sk->sk_family, sk->sk_type,
-				sk->sk_protocol, ntohs(ad->u.net.sport),
-				&port_sid);
+	err = sel_netport_sid(sk->sk_protocol,
+			      ntohs(ad->u.net.sport), &port_sid);
 	if (unlikely(err)) {
 		printk(KERN_WARNING
 		       "SELinux: failure in"
@@ -4139,7 +4179,7 @@
 	newssec->peer_sid = ssec->peer_sid;
 	newssec->sclass = ssec->sclass;
 
-	selinux_netlbl_sk_security_clone(ssec, newssec);
+	selinux_netlbl_sk_security_reset(newssec, newsk->sk_family);
 }
 
 static void selinux_sk_getsecid(struct sock *sk, u32 *secid)
@@ -4373,9 +4413,8 @@
 	if (send_perm != 0)
 		return 0;
 
-	err = security_port_sid(sk->sk_family, sk->sk_type,
-				sk->sk_protocol, ntohs(ad->u.net.dport),
-				&port_sid);
+	err = sel_netport_sid(sk->sk_protocol,
+			      ntohs(ad->u.net.dport), &port_sid);
 	if (unlikely(err)) {
 		printk(KERN_WARNING
 		       "SELinux: failure in"
@@ -4561,7 +4600,6 @@
 		return -ENOMEM;
 
 	isec->sclass = sclass;
-	isec->ipc_perm = perm;
 	isec->sid = tsec->sid;
 	perm->security = isec;
 
@@ -4583,7 +4621,6 @@
 	if (!msec)
 		return -ENOMEM;
 
-	msec->msg = msg;
 	msec->sid = SECINITSID_UNLABELED;
 	msg->security = msec;
 
@@ -4994,14 +5031,14 @@
 {
 	if (secondary_ops != original_ops) {
 		printk(KERN_ERR "%s:  There is already a secondary security "
-		       "module registered.\n", __FUNCTION__);
+		       "module registered.\n", __func__);
 		return -EINVAL;
  	}
 
 	secondary_ops = ops;
 
 	printk(KERN_INFO "%s:  Registering secondary module %s\n",
-	       __FUNCTION__,
+	       __func__,
 	       name);
 
 	return 0;
@@ -5057,6 +5094,7 @@
 			       char *name, void *value, size_t size)
 {
 	struct task_security_struct *tsec;
+	struct task_struct *tracer;
 	u32 sid = 0;
 	int error;
 	char *str = value;
@@ -5145,18 +5183,24 @@
 		/* Check for ptracing, and update the task SID if ok.
 		   Otherwise, leave SID unchanged and fail. */
 		task_lock(p);
-		if (p->ptrace & PT_PTRACED) {
-			error = avc_has_perm_noaudit(tsec->ptrace_sid, sid,
+		rcu_read_lock();
+		tracer = task_tracer_task(p);
+		if (tracer != NULL) {
+			struct task_security_struct *ptsec = tracer->security;
+			u32 ptsid = ptsec->sid;
+			rcu_read_unlock();
+			error = avc_has_perm_noaudit(ptsid, sid,
 						     SECCLASS_PROCESS,
 						     PROCESS__PTRACE, 0, &avd);
 			if (!error)
 				tsec->sid = sid;
 			task_unlock(p);
-			avc_audit(tsec->ptrace_sid, sid, SECCLASS_PROCESS,
+			avc_audit(ptsid, sid, SECCLASS_PROCESS,
 				  PROCESS__PTRACE, &avd, error, NULL);
 			if (error)
 				return error;
 		} else {
+			rcu_read_unlock();
 			tsec->sid = sid;
 			task_unlock(p);
 		}
@@ -5194,7 +5238,6 @@
 	if (!ksec)
 		return -ENOMEM;
 
-	ksec->obj = k;
 	if (tsec->keycreate_sid)
 		ksec->sid = tsec->keycreate_sid;
 	else
@@ -5631,5 +5674,3 @@
 	return 0;
 }
 #endif
-
-
diff --git a/security/selinux/include/av_perm_to_string.h b/security/selinux/include/av_perm_to_string.h
index d569669..1223b4f 100644
--- a/security/selinux/include/av_perm_to_string.h
+++ b/security/selinux/include/av_perm_to_string.h
@@ -14,12 +14,17 @@
    S_(SECCLASS_DIR, DIR__REPARENT, "reparent")
    S_(SECCLASS_DIR, DIR__SEARCH, "search")
    S_(SECCLASS_DIR, DIR__RMDIR, "rmdir")
+   S_(SECCLASS_DIR, DIR__OPEN, "open")
    S_(SECCLASS_FILE, FILE__EXECUTE_NO_TRANS, "execute_no_trans")
    S_(SECCLASS_FILE, FILE__ENTRYPOINT, "entrypoint")
    S_(SECCLASS_FILE, FILE__EXECMOD, "execmod")
+   S_(SECCLASS_FILE, FILE__OPEN, "open")
    S_(SECCLASS_CHR_FILE, CHR_FILE__EXECUTE_NO_TRANS, "execute_no_trans")
    S_(SECCLASS_CHR_FILE, CHR_FILE__ENTRYPOINT, "entrypoint")
    S_(SECCLASS_CHR_FILE, CHR_FILE__EXECMOD, "execmod")
+   S_(SECCLASS_CHR_FILE, CHR_FILE__OPEN, "open")
+   S_(SECCLASS_BLK_FILE, BLK_FILE__OPEN, "open")
+   S_(SECCLASS_FIFO_FILE, FIFO_FILE__OPEN, "open")
    S_(SECCLASS_FD, FD__USE, "use")
    S_(SECCLASS_TCP_SOCKET, TCP_SOCKET__CONNECTTO, "connectto")
    S_(SECCLASS_TCP_SOCKET, TCP_SOCKET__NEWCONN, "newconn")
diff --git a/security/selinux/include/av_permissions.h b/security/selinux/include/av_permissions.h
index 75b4131..c4c5116 100644
--- a/security/selinux/include/av_permissions.h
+++ b/security/selinux/include/av_permissions.h
@@ -79,6 +79,7 @@
 #define DIR__REPARENT                             0x00080000UL
 #define DIR__SEARCH                               0x00100000UL
 #define DIR__RMDIR                                0x00200000UL
+#define DIR__OPEN                                 0x00400000UL
 #define FILE__IOCTL                               0x00000001UL
 #define FILE__READ                                0x00000002UL
 #define FILE__WRITE                               0x00000004UL
@@ -99,6 +100,7 @@
 #define FILE__EXECUTE_NO_TRANS                    0x00020000UL
 #define FILE__ENTRYPOINT                          0x00040000UL
 #define FILE__EXECMOD                             0x00080000UL
+#define FILE__OPEN                                0x00100000UL
 #define LNK_FILE__IOCTL                           0x00000001UL
 #define LNK_FILE__READ                            0x00000002UL
 #define LNK_FILE__WRITE                           0x00000004UL
@@ -136,6 +138,7 @@
 #define CHR_FILE__EXECUTE_NO_TRANS                0x00020000UL
 #define CHR_FILE__ENTRYPOINT                      0x00040000UL
 #define CHR_FILE__EXECMOD                         0x00080000UL
+#define CHR_FILE__OPEN                            0x00100000UL
 #define BLK_FILE__IOCTL                           0x00000001UL
 #define BLK_FILE__READ                            0x00000002UL
 #define BLK_FILE__WRITE                           0x00000004UL
@@ -153,6 +156,7 @@
 #define BLK_FILE__SWAPON                          0x00004000UL
 #define BLK_FILE__QUOTAON                         0x00008000UL
 #define BLK_FILE__MOUNTON                         0x00010000UL
+#define BLK_FILE__OPEN                            0x00020000UL
 #define SOCK_FILE__IOCTL                          0x00000001UL
 #define SOCK_FILE__READ                           0x00000002UL
 #define SOCK_FILE__WRITE                          0x00000004UL
@@ -187,6 +191,7 @@
 #define FIFO_FILE__SWAPON                         0x00004000UL
 #define FIFO_FILE__QUOTAON                        0x00008000UL
 #define FIFO_FILE__MOUNTON                        0x00010000UL
+#define FIFO_FILE__OPEN                           0x00020000UL
 #define FD__USE                                   0x00000001UL
 #define SOCKET__IOCTL                             0x00000001UL
 #define SOCKET__READ                              0x00000002UL
diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index 00a2809..9a9e7cd 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h
@@ -41,10 +41,6 @@
 
 void selinux_netlbl_sk_security_reset(struct sk_security_struct *ssec,
 				      int family);
-void selinux_netlbl_sk_security_init(struct sk_security_struct *ssec,
-				     int family);
-void selinux_netlbl_sk_security_clone(struct sk_security_struct *ssec,
-				      struct sk_security_struct *newssec);
 
 int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
 				 u16 family,
@@ -73,18 +69,6 @@
 {
 	return;
 }
-static inline void selinux_netlbl_sk_security_init(
-	                                       struct sk_security_struct *ssec,
-					       int family)
-{
-	return;
-}
-static inline void selinux_netlbl_sk_security_clone(
-	                                    struct sk_security_struct *ssec,
-					    struct sk_security_struct *newssec)
-{
-	return;
-}
 
 static inline int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
 					       u16 family,
diff --git a/security/selinux/include/netport.h b/security/selinux/include/netport.h
new file mode 100644
index 0000000..8991752
--- /dev/null
+++ b/security/selinux/include/netport.h
@@ -0,0 +1,31 @@
+/*
+ * Network port table
+ *
+ * SELinux must keep a mapping of network ports to labels/SIDs.  This
+ * mapping is maintained as part of the normal policy but a fast cache is
+ * needed to reduce the lookup overhead.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2008
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _SELINUX_NETPORT_H
+#define _SELINUX_NETPORT_H
+
+int sel_netport_sid(u8 protocol, u16 pnum, u32 *sid);
+
+#endif
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index c6c2bb4..300b61b 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -28,14 +28,12 @@
 #include "avc.h"
 
 struct task_security_struct {
-	struct task_struct *task;      /* back pointer to task object */
 	u32 osid;            /* SID prior to last execve */
 	u32 sid;             /* current SID */
 	u32 exec_sid;        /* exec SID */
 	u32 create_sid;      /* fscreate SID */
 	u32 keycreate_sid;   /* keycreate SID */
 	u32 sockcreate_sid;  /* fscreate SID */
-	u32 ptrace_sid;      /* SID of ptrace parent */
 };
 
 struct inode_security_struct {
@@ -50,7 +48,6 @@
 };
 
 struct file_security_struct {
-	struct file *file;              /* back pointer to file object */
 	u32 sid;              /* SID of open file description */
 	u32 fown_sid;         /* SID of file owner (for SIGIO) */
 	u32 isid;             /* SID of inode at the time of file open */
@@ -73,18 +70,15 @@
 };
 
 struct msg_security_struct {
-	struct msg_msg *msg;		/* back pointer */
 	u32 sid;              /* SID of message */
 };
 
 struct ipc_security_struct {
-	struct kern_ipc_perm *ipc_perm; /* back pointer */
 	u16 sclass;	/* security class of this object */
 	u32 sid;              /* SID of IPC resource */
 };
 
 struct bprm_security_struct {
-	struct linux_binprm *bprm;     /* back pointer to bprm object */
 	u32 sid;                       /* SID for transformed process */
 	unsigned char set;
 
@@ -109,8 +103,13 @@
 	u16 family;			/* address family */
 };
 
+struct netport_security_struct {
+	u32 sid;			/* SID for this node */
+	u16 port;			/* port number */
+	u8 protocol;			/* transport protocol */
+};
+
 struct sk_security_struct {
-	struct sock *sk;		/* back pointer to sk object */
 	u32 sid;			/* SID of this object */
 	u32 peer_sid;			/* SID of peer */
 	u16 sclass;			/* sock security class */
@@ -120,12 +119,10 @@
 		NLBL_REQUIRE,
 		NLBL_LABELED,
 	} nlbl_state;
-	spinlock_t nlbl_lock;		/* protects nlbl_state */
 #endif
 };
 
 struct key_security_struct {
-	struct key *obj; /* back pointer */
 	u32 sid;         /* SID of key */
 };
 
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 44e12ec..1904c46 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -26,13 +26,14 @@
 #define POLICYDB_VERSION_AVTAB		20
 #define POLICYDB_VERSION_RANGETRANS	21
 #define POLICYDB_VERSION_POLCAP		22
+#define POLICYDB_VERSION_PERMISSIVE	23
 
 /* Range of policy versions we understand*/
 #define POLICYDB_VERSION_MIN   POLICYDB_VERSION_BASE
 #ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX
 #define POLICYDB_VERSION_MAX	CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
 #else
-#define POLICYDB_VERSION_MAX	POLICYDB_VERSION_POLCAP
+#define POLICYDB_VERSION_MAX	POLICYDB_VERSION_PERMISSIVE
 #endif
 
 #define CONTEXT_MNT	0x01
@@ -40,6 +41,11 @@
 #define ROOTCONTEXT_MNT	0x04
 #define DEFCONTEXT_MNT	0x08
 
+#define CONTEXT_STR	"context="
+#define FSCONTEXT_STR	"fscontext="
+#define ROOTCONTEXT_STR	"rootcontext="
+#define DEFCONTEXT_STR	"defcontext="
+
 struct netlbl_lsm_secattr;
 
 extern int selinux_enabled;
@@ -48,11 +54,13 @@
 /* Policy capabilities */
 enum {
 	POLICYDB_CAPABILITY_NETPEER,
+	POLICYDB_CAPABILITY_OPENPERM,
 	__POLICYDB_CAPABILITY_MAX
 };
 #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1)
 
 extern int selinux_policycap_netpeer;
+extern int selinux_policycap_openperm;
 
 int security_load_policy(void * data, size_t len);
 
@@ -67,6 +75,8 @@
 	u32 seqno;
 };
 
+int security_permissive_sid(u32 sid);
+
 int security_compute_av(u32 ssid, u32 tsid,
 	u16 tclass, u32 requested,
 	struct av_decision *avd);
@@ -92,8 +102,7 @@
 int security_get_user_sids(u32 callsid, char *username,
 			   u32 **sids, u32 *nel);
 
-int security_port_sid(u16 domain, u16 type, u8 protocol, u16 port,
-	u32 *out_sid);
+int security_port_sid(u8 protocol, u16 port, u32 *out_sid);
 
 int security_netif_sid(char *name, u32 *if_sid);
 
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index 0fa2be4..e8ee91a 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -34,6 +34,7 @@
 
 #include "objsec.h"
 #include "security.h"
+#include "netlabel.h"
 
 /**
  * selinux_netlbl_sidlookup_cached - Cache a SID lookup
@@ -69,9 +70,7 @@
  *
  * Description:
  * Attempt to label a socket using the NetLabel mechanism using the given
- * SID.  Returns zero values on success, negative values on failure.  The
- * caller is responsibile for calling rcu_read_lock() before calling this
- * this function and rcu_read_unlock() after this function returns.
+ * SID.  Returns zero values on success, negative values on failure.
  *
  */
 static int selinux_netlbl_sock_setsid(struct sock *sk, u32 sid)
@@ -86,11 +85,8 @@
 	if (rc != 0)
 		goto sock_setsid_return;
 	rc = netlbl_sock_setattr(sk, &secattr);
-	if (rc == 0) {
-		spin_lock_bh(&sksec->nlbl_lock);
+	if (rc == 0)
 		sksec->nlbl_state = NLBL_LABELED;
-		spin_unlock_bh(&sksec->nlbl_lock);
-	}
 
 sock_setsid_return:
 	netlbl_secattr_destroy(&secattr);
@@ -129,45 +125,6 @@
 }
 
 /**
- * selinux_netlbl_sk_security_init - Setup the NetLabel fields
- * @ssec: the sk_security_struct
- * @family: the socket family
- *
- * Description:
- * Called when a new sk_security_struct is allocated to initialize the NetLabel
- * fields.
- *
- */
-void selinux_netlbl_sk_security_init(struct sk_security_struct *ssec,
-				     int family)
-{
-	/* No locking needed, we are the only one who has access to ssec */
-	selinux_netlbl_sk_security_reset(ssec, family);
-	spin_lock_init(&ssec->nlbl_lock);
-}
-
-/**
- * selinux_netlbl_sk_security_clone - Copy the NetLabel fields
- * @ssec: the original sk_security_struct
- * @newssec: the cloned sk_security_struct
- *
- * Description:
- * Clone the NetLabel specific sk_security_struct fields from @ssec to
- * @newssec.
- *
- */
-void selinux_netlbl_sk_security_clone(struct sk_security_struct *ssec,
-				      struct sk_security_struct *newssec)
-{
-	/* We don't need to take newssec->nlbl_lock because we are the only
-	 * thread with access to newssec, but we do need to take the RCU read
-	 * lock as other threads could have access to ssec */
-	rcu_read_lock();
-	selinux_netlbl_sk_security_reset(newssec, ssec->sk->sk_family);
-	rcu_read_unlock();
-}
-
-/**
  * selinux_netlbl_skbuff_getsid - Get the sid of a packet using NetLabel
  * @skb: the packet
  * @family: protocol family
@@ -221,12 +178,8 @@
 	struct netlbl_lsm_secattr secattr;
 	u32 nlbl_peer_sid;
 
-	rcu_read_lock();
-
-	if (sksec->nlbl_state != NLBL_REQUIRE) {
-		rcu_read_unlock();
+	if (sksec->nlbl_state != NLBL_REQUIRE)
 		return;
-	}
 
 	netlbl_secattr_init(&secattr);
 	if (netlbl_sock_getattr(sk, &secattr) == 0 &&
@@ -239,8 +192,6 @@
 	 * here we will pick up the pieces in later calls to
 	 * selinux_netlbl_inode_permission(). */
 	selinux_netlbl_sock_setsid(sk, sksec->sid);
-
-	rcu_read_unlock();
 }
 
 /**
@@ -254,16 +205,13 @@
  */
 int selinux_netlbl_socket_post_create(struct socket *sock)
 {
-	int rc = 0;
 	struct sock *sk = sock->sk;
 	struct sk_security_struct *sksec = sk->sk_security;
 
-	rcu_read_lock();
-	if (sksec->nlbl_state == NLBL_REQUIRE)
-		rc = selinux_netlbl_sock_setsid(sk, sksec->sid);
-	rcu_read_unlock();
+	if (sksec->nlbl_state != NLBL_REQUIRE)
+		return 0;
 
-	return rc;
+	return selinux_netlbl_sock_setsid(sk, sksec->sid);
 }
 
 /**
@@ -288,21 +236,21 @@
 	if (!S_ISSOCK(inode->i_mode) ||
 	    ((mask & (MAY_WRITE | MAY_APPEND)) == 0))
 		return 0;
+
 	sock = SOCKET_I(inode);
 	sk = sock->sk;
 	sksec = sk->sk_security;
-
-	rcu_read_lock();
-	if (sksec->nlbl_state != NLBL_REQUIRE) {
-		rcu_read_unlock();
+	if (sksec->nlbl_state != NLBL_REQUIRE)
 		return 0;
-	}
+
 	local_bh_disable();
 	bh_lock_sock_nested(sk);
-	rc = selinux_netlbl_sock_setsid(sk, sksec->sid);
+	if (likely(sksec->nlbl_state == NLBL_REQUIRE))
+		rc = selinux_netlbl_sock_setsid(sk, sksec->sid);
+	else
+		rc = 0;
 	bh_unlock_sock(sk);
 	local_bh_enable();
-	rcu_read_unlock();
 
 	return rc;
 }
@@ -385,7 +333,6 @@
 	struct sk_security_struct *sksec = sk->sk_security;
 	struct netlbl_lsm_secattr secattr;
 
-	rcu_read_lock();
 	if (level == IPPROTO_IP && optname == IP_OPTIONS &&
 	    sksec->nlbl_state == NLBL_LABELED) {
 		netlbl_secattr_init(&secattr);
@@ -396,7 +343,6 @@
 			rc = -EACCES;
 		netlbl_secattr_destroy(&secattr);
 	}
-	rcu_read_unlock();
 
 	return rc;
 }
diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c
index b59871d..6214a7a 100644
--- a/security/selinux/netlink.c
+++ b/security/selinux/netlink.c
@@ -89,7 +89,7 @@
 nlmsg_failure:
 	kfree_skb(skb);
 oom:
-	printk(KERN_ERR "SELinux:  OOM in %s\n", __FUNCTION__);
+	printk(KERN_ERR "SELinux:  OOM in %s\n", __func__);
 	goto out;
 }
 
diff --git a/security/selinux/netport.c b/security/selinux/netport.c
new file mode 100644
index 0000000..68ede3c
--- /dev/null
+++ b/security/selinux/netport.c
@@ -0,0 +1,286 @@
+/*
+ * Network port table
+ *
+ * SELinux must keep a mapping of network ports to labels/SIDs.  This
+ * mapping is maintained as part of the normal policy but a fast cache is
+ * needed to reduce the lookup overhead.
+ *
+ * Author: Paul Moore <paul.moore@hp.com>
+ *
+ * This code is heavily based on the "netif" concept originally developed by
+ * James Morris <jmorris@redhat.com>
+ *   (see security/selinux/netif.c for more information)
+ *
+ */
+
+/*
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2008
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <asm/bug.h>
+
+#include "netport.h"
+#include "objsec.h"
+
+#define SEL_NETPORT_HASH_SIZE       256
+#define SEL_NETPORT_HASH_BKT_LIMIT   16
+
+struct sel_netport_bkt {
+	int size;
+	struct list_head list;
+};
+
+struct sel_netport {
+	struct netport_security_struct psec;
+
+	struct list_head list;
+	struct rcu_head rcu;
+};
+
+/* NOTE: we are using a combined hash table for both IPv4 and IPv6, the reason
+ * for this is that I suspect most users will not make heavy use of both
+ * address families at the same time so one table will usually end up wasted,
+ * if this becomes a problem we can always add a hash table for each address
+ * family later */
+
+static LIST_HEAD(sel_netport_list);
+static DEFINE_SPINLOCK(sel_netport_lock);
+static struct sel_netport_bkt sel_netport_hash[SEL_NETPORT_HASH_SIZE];
+
+/**
+ * sel_netport_free - Frees a port entry
+ * @p: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that memory allocated to a hash table port entry can be
+ * released safely.
+ *
+ */
+static void sel_netport_free(struct rcu_head *p)
+{
+	struct sel_netport *port = container_of(p, struct sel_netport, rcu);
+	kfree(port);
+}
+
+/**
+ * sel_netport_hashfn - Hashing function for the port table
+ * @pnum: port number
+ *
+ * Description:
+ * This is the hashing function for the port table, it returns the bucket
+ * number for the given port.
+ *
+ */
+static unsigned int sel_netport_hashfn(u16 pnum)
+{
+	return (pnum & (SEL_NETPORT_HASH_SIZE - 1));
+}
+
+/**
+ * sel_netport_find - Search for a port record
+ * @protocol: protocol
+ * @port: pnum
+ *
+ * Description:
+ * Search the network port table and return the matching record.  If an entry
+ * can not be found in the table return NULL.
+ *
+ */
+static struct sel_netport *sel_netport_find(u8 protocol, u16 pnum)
+{
+	unsigned int idx;
+	struct sel_netport *port;
+
+	idx = sel_netport_hashfn(pnum);
+	list_for_each_entry_rcu(port, &sel_netport_hash[idx].list, list)
+		if (port->psec.port == pnum &&
+		    port->psec.protocol == protocol)
+			return port;
+
+	return NULL;
+}
+
+/**
+ * sel_netport_insert - Insert a new port into the table
+ * @port: the new port record
+ *
+ * Description:
+ * Add a new port record to the network address hash table.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int sel_netport_insert(struct sel_netport *port)
+{
+	unsigned int idx;
+
+	/* we need to impose a limit on the growth of the hash table so check
+	 * this bucket to make sure it is within the specified bounds */
+	idx = sel_netport_hashfn(port->psec.port);
+	list_add_rcu(&port->list, &sel_netport_hash[idx].list);
+	if (sel_netport_hash[idx].size == SEL_NETPORT_HASH_BKT_LIMIT) {
+		struct sel_netport *tail;
+		tail = list_entry(port->list.prev, struct sel_netport, list);
+		list_del_rcu(port->list.prev);
+		call_rcu(&tail->rcu, sel_netport_free);
+	} else
+		sel_netport_hash[idx].size++;
+
+	return 0;
+}
+
+/**
+ * sel_netport_sid_slow - Lookup the SID of a network address using the policy
+ * @protocol: protocol
+ * @pnum: port
+ * @sid: port SID
+ *
+ * Description:
+ * This function determines the SID of a network port by quering the security
+ * policy.  The result is added to the network port table to speedup future
+ * queries.  Returns zero on success, negative values on failure.
+ *
+ */
+static int sel_netport_sid_slow(u8 protocol, u16 pnum, u32 *sid)
+{
+	int ret;
+	struct sel_netport *port;
+	struct sel_netport *new = NULL;
+
+	spin_lock_bh(&sel_netport_lock);
+	port = sel_netport_find(protocol, pnum);
+	if (port != NULL) {
+		*sid = port->psec.sid;
+		ret = 0;
+		goto out;
+	}
+	new = kzalloc(sizeof(*new), GFP_ATOMIC);
+	if (new == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	ret = security_port_sid(protocol, pnum, &new->psec.sid);
+	if (ret != 0)
+		goto out;
+	new->psec.port = pnum;
+	new->psec.protocol = protocol;
+	ret = sel_netport_insert(new);
+	if (ret != 0)
+		goto out;
+	*sid = new->psec.sid;
+
+out:
+	spin_unlock_bh(&sel_netport_lock);
+	if (unlikely(ret)) {
+		printk(KERN_WARNING
+		       "SELinux: failure in sel_netport_sid_slow(),"
+		       " unable to determine network port label\n");
+		kfree(new);
+	}
+	return ret;
+}
+
+/**
+ * sel_netport_sid - Lookup the SID of a network port
+ * @protocol: protocol
+ * @pnum: port
+ * @sid: port SID
+ *
+ * Description:
+ * This function determines the SID of a network port using the fastest method
+ * possible.  First the port table is queried, but if an entry can't be found
+ * then the policy is queried and the result is added to the table to speedup
+ * future queries.  Returns zero on success, negative values on failure.
+ *
+ */
+int sel_netport_sid(u8 protocol, u16 pnum, u32 *sid)
+{
+	struct sel_netport *port;
+
+	rcu_read_lock();
+	port = sel_netport_find(protocol, pnum);
+	if (port != NULL) {
+		*sid = port->psec.sid;
+		rcu_read_unlock();
+		return 0;
+	}
+	rcu_read_unlock();
+
+	return sel_netport_sid_slow(protocol, pnum, sid);
+}
+
+/**
+ * sel_netport_flush - Flush the entire network port table
+ *
+ * Description:
+ * Remove all entries from the network address table.
+ *
+ */
+static void sel_netport_flush(void)
+{
+	unsigned int idx;
+	struct sel_netport *port;
+
+	spin_lock_bh(&sel_netport_lock);
+	for (idx = 0; idx < SEL_NETPORT_HASH_SIZE; idx++) {
+		list_for_each_entry(port, &sel_netport_hash[idx].list, list) {
+			list_del_rcu(&port->list);
+			call_rcu(&port->rcu, sel_netport_free);
+		}
+		sel_netport_hash[idx].size = 0;
+	}
+	spin_unlock_bh(&sel_netport_lock);
+}
+
+static int sel_netport_avc_callback(u32 event, u32 ssid, u32 tsid,
+				    u16 class, u32 perms, u32 *retained)
+{
+	if (event == AVC_CALLBACK_RESET) {
+		sel_netport_flush();
+		synchronize_net();
+	}
+	return 0;
+}
+
+static __init int sel_netport_init(void)
+{
+	int iter;
+	int ret;
+
+	if (!selinux_enabled)
+		return 0;
+
+	for (iter = 0; iter < SEL_NETPORT_HASH_SIZE; iter++) {
+		INIT_LIST_HEAD(&sel_netport_hash[iter].list);
+		sel_netport_hash[iter].size = 0;
+	}
+
+	ret = avc_add_callback(sel_netport_avc_callback, AVC_CALLBACK_RESET,
+	                       SECSID_NULL, SECSID_NULL, SECCLASS_NULL, 0);
+	if (ret != 0)
+		panic("avc_add_callback() failed, error %d\n", ret);
+
+	return ret;
+}
+
+__initcall(sel_netport_init);
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 0341567..26fabad 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -42,7 +42,8 @@
 
 /* Policy capability filenames */
 static char *policycap_names[] = {
-	"network_peer_controls"
+	"network_peer_controls",
+	"open_perms"
 };
 
 unsigned int selinux_checkreqprot = CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE;
@@ -391,7 +392,7 @@
 
 	if (len > SIMPLE_TRANSACTION_LIMIT) {
 		printk(KERN_ERR "%s:  context size (%u) exceeds payload "
-		       "max\n", __FUNCTION__, len);
+		       "max\n", __func__, len);
 		length = -ERANGE;
 		goto out;
 	}
@@ -644,7 +645,7 @@
 
 	if (len > SIMPLE_TRANSACTION_LIMIT) {
 		printk(KERN_ERR "%s:  context size (%u) exceeds payload "
-		       "max\n", __FUNCTION__, len);
+		       "max\n", __func__, len);
 		length = -ERANGE;
 		goto out3;
 	}
@@ -821,7 +822,7 @@
 
 	if (len > SIMPLE_TRANSACTION_LIMIT) {
 		printk(KERN_ERR "%s:  context size (%u) exceeds payload "
-		       "max\n", __FUNCTION__, len);
+		       "max\n", __func__, len);
 		length = -ERANGE;
 		goto out3;
 	}
@@ -1760,7 +1761,7 @@
 out:
 	return ret;
 err:
-	printk(KERN_ERR "%s:  failed while creating inodes\n", __FUNCTION__);
+	printk(KERN_ERR "%s:  failed while creating inodes\n", __func__);
 	goto out;
 }
 
diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c
index cd10e27..916e73a 100644
--- a/security/selinux/ss/avtab.c
+++ b/security/selinux/ss/avtab.c
@@ -280,8 +280,8 @@
 	h->nel = 0;
 	h->nslot = nslot;
 	h->mask = mask;
-	printk(KERN_DEBUG "SELinux:%d avtab hash slots allocated. "
-	       "Num of rules:%d\n", h->nslot, nrules);
+	printk(KERN_DEBUG "SELinux: %d avtab hash slots, %d rules.\n",
+	       h->nslot, nrules);
 	return 0;
 }
 
@@ -345,18 +345,18 @@
 	if (vers < POLICYDB_VERSION_AVTAB) {
 		rc = next_entry(buf32, fp, sizeof(u32));
 		if (rc < 0) {
-			printk(KERN_ERR "security: avtab: truncated entry\n");
+			printk(KERN_ERR "SELinux: avtab: truncated entry\n");
 			return -1;
 		}
 		items2 = le32_to_cpu(buf32[0]);
 		if (items2 > ARRAY_SIZE(buf32)) {
-			printk(KERN_ERR "security: avtab: entry overflow\n");
+			printk(KERN_ERR "SELinux: avtab: entry overflow\n");
 			return -1;
 
 		}
 		rc = next_entry(buf32, fp, sizeof(u32)*items2);
 		if (rc < 0) {
-			printk(KERN_ERR "security: avtab: truncated entry\n");
+			printk(KERN_ERR "SELinux: avtab: truncated entry\n");
 			return -1;
 		}
 		items = 0;
@@ -364,19 +364,19 @@
 		val = le32_to_cpu(buf32[items++]);
 		key.source_type = (u16)val;
 		if (key.source_type != val) {
-			printk("security: avtab: truncated source type\n");
+			printk("SELinux: avtab: truncated source type\n");
 			return -1;
 		}
 		val = le32_to_cpu(buf32[items++]);
 		key.target_type = (u16)val;
 		if (key.target_type != val) {
-			printk("security: avtab: truncated target type\n");
+			printk("SELinux: avtab: truncated target type\n");
 			return -1;
 		}
 		val = le32_to_cpu(buf32[items++]);
 		key.target_class = (u16)val;
 		if (key.target_class != val) {
-			printk("security: avtab: truncated target class\n");
+			printk("SELinux: avtab: truncated target class\n");
 			return -1;
 		}
 
@@ -384,12 +384,12 @@
 		enabled = (val & AVTAB_ENABLED_OLD) ? AVTAB_ENABLED : 0;
 
 		if (!(val & (AVTAB_AV | AVTAB_TYPE))) {
-			printk("security: avtab: null entry\n");
+			printk("SELinux: avtab: null entry\n");
 			return -1;
 		}
 		if ((val & AVTAB_AV) &&
 		    (val & AVTAB_TYPE)) {
-			printk("security: avtab: entry has both access vectors and types\n");
+			printk("SELinux: avtab: entry has both access vectors and types\n");
 			return -1;
 		}
 
@@ -403,7 +403,7 @@
 		}
 
 		if (items != items2) {
-			printk("security: avtab: entry only had %d items, expected %d\n", items2, items);
+			printk("SELinux: avtab: entry only had %d items, expected %d\n", items2, items);
 			return -1;
 		}
 		return 0;
@@ -411,7 +411,7 @@
 
 	rc = next_entry(buf16, fp, sizeof(u16)*4);
 	if (rc < 0) {
-		printk("security: avtab: truncated entry\n");
+		printk("SELinux: avtab: truncated entry\n");
 		return -1;
 	}
 
@@ -424,7 +424,7 @@
 	if (!policydb_type_isvalid(pol, key.source_type) ||
 	    !policydb_type_isvalid(pol, key.target_type) ||
 	    !policydb_class_isvalid(pol, key.target_class)) {
-		printk(KERN_WARNING "security: avtab: invalid type or class\n");
+		printk(KERN_WARNING "SELinux: avtab: invalid type or class\n");
 		return -1;
 	}
 
@@ -435,19 +435,19 @@
 	}
 	if (!set || set > 1) {
 		printk(KERN_WARNING
-			"security:  avtab:  more than one specifier\n");
+			"SELinux:  avtab:  more than one specifier\n");
 		return -1;
 	}
 
 	rc = next_entry(buf32, fp, sizeof(u32));
 	if (rc < 0) {
-		printk("security: avtab: truncated entry\n");
+		printk("SELinux: avtab: truncated entry\n");
 		return -1;
 	}
 	datum.data = le32_to_cpu(*buf32);
 	if ((key.specified & AVTAB_TYPE) &&
 	    !policydb_type_isvalid(pol, datum.data)) {
-		printk(KERN_WARNING "security: avtab: invalid type\n");
+		printk(KERN_WARNING "SELinux: avtab: invalid type\n");
 		return -1;
 	}
 	return insertf(a, &key, &datum, p);
@@ -468,12 +468,12 @@
 
 	rc = next_entry(buf, fp, sizeof(u32));
 	if (rc < 0) {
-		printk(KERN_ERR "security: avtab: truncated table\n");
+		printk(KERN_ERR "SELinux: avtab: truncated table\n");
 		goto bad;
 	}
 	nel = le32_to_cpu(buf[0]);
 	if (!nel) {
-		printk(KERN_ERR "security: avtab: table is empty\n");
+		printk(KERN_ERR "SELinux: avtab: table is empty\n");
 		rc = -EINVAL;
 		goto bad;
 	}
@@ -486,9 +486,9 @@
 		rc = avtab_read_item(a, fp, pol, avtab_insertf, NULL);
 		if (rc) {
 			if (rc == -ENOMEM)
-				printk(KERN_ERR "security: avtab: out of memory\n");
+				printk(KERN_ERR "SELinux: avtab: out of memory\n");
 			else if (rc == -EEXIST)
-				printk(KERN_ERR "security: avtab: duplicate entry\n");
+				printk(KERN_ERR "SELinux: avtab: duplicate entry\n");
 			else
 				rc = -EINVAL;
 			goto bad;
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
index 50ad85d..a996cf1 100644
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -96,7 +96,7 @@
 	if (new_state != node->cur_state) {
 		node->cur_state = new_state;
 		if (new_state == -1)
-			printk(KERN_ERR "security: expression result was undefined - disabling all rules.\n");
+			printk(KERN_ERR "SELinux: expression result was undefined - disabling all rules.\n");
 		/* turn the rules on or off */
 		for (cur = node->true_list; cur != NULL; cur = cur->next) {
 			if (new_state <= 0) {
@@ -276,7 +276,7 @@
 	 */
 	if (k->specified & AVTAB_TYPE) {
 		if (avtab_search(&p->te_avtab, k)) {
-			printk("security: type rule already exists outside of a conditional.");
+			printk("SELinux: type rule already exists outside of a conditional.");
 			goto err;
 		}
 		/*
@@ -291,7 +291,7 @@
 			node_ptr = avtab_search_node(&p->te_cond_avtab, k);
 			if (node_ptr) {
 				if (avtab_search_node_next(node_ptr, k->specified)) {
-					printk("security: too many conflicting type rules.");
+					printk("SELinux: too many conflicting type rules.");
 					goto err;
 				}
 				found = 0;
@@ -302,13 +302,13 @@
 					}
 				}
 				if (!found) {
-					printk("security: conflicting type rules.\n");
+					printk("SELinux: conflicting type rules.\n");
 					goto err;
 				}
 			}
 		} else {
 			if (avtab_search(&p->te_cond_avtab, k)) {
-				printk("security: conflicting type rules when adding type rule for true.\n");
+				printk("SELinux: conflicting type rules when adding type rule for true.\n");
 				goto err;
 			}
 		}
@@ -316,7 +316,7 @@
 
 	node_ptr = avtab_insert_nonunique(&p->te_cond_avtab, k, d);
 	if (!node_ptr) {
-		printk("security: could not insert rule.");
+		printk("SELinux: could not insert rule.");
 		goto err;
 	}
 
@@ -376,12 +376,12 @@
 static int expr_isvalid(struct policydb *p, struct cond_expr *expr)
 {
 	if (expr->expr_type <= 0 || expr->expr_type > COND_LAST) {
-		printk("security: conditional expressions uses unknown operator.\n");
+		printk("SELinux: conditional expressions uses unknown operator.\n");
 		return 0;
 	}
 
 	if (expr->bool > p->p_bools.nprim) {
-		printk("security: conditional expressions uses unknown bool.\n");
+		printk("SELinux: conditional expressions uses unknown bool.\n");
 		return 0;
 	}
 	return 1;
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index 920b5e3..e499af4 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -364,7 +364,7 @@
 	count = le32_to_cpu(buf[2]);
 
 	if (mapunit != sizeof(u64) * 8) {
-		printk(KERN_ERR "security: ebitmap: map size %u does not "
+		printk(KERN_ERR "SELinux: ebitmap: map size %u does not "
 		       "match my size %Zd (high bit was %d)\n",
 		       mapunit, sizeof(u64) * 8, e->highbit);
 		goto bad;
@@ -382,19 +382,19 @@
 	for (i = 0; i < count; i++) {
 		rc = next_entry(&startbit, fp, sizeof(u32));
 		if (rc < 0) {
-			printk(KERN_ERR "security: ebitmap: truncated map\n");
+			printk(KERN_ERR "SELinux: ebitmap: truncated map\n");
 			goto bad;
 		}
 		startbit = le32_to_cpu(startbit);
 
 		if (startbit & (mapunit - 1)) {
-			printk(KERN_ERR "security: ebitmap start bit (%d) is "
+			printk(KERN_ERR "SELinux: ebitmap start bit (%d) is "
 			       "not a multiple of the map unit size (%u)\n",
 			       startbit, mapunit);
 			goto bad;
 		}
 		if (startbit > e->highbit - mapunit) {
-			printk(KERN_ERR "security: ebitmap start bit (%d) is "
+			printk(KERN_ERR "SELinux: ebitmap start bit (%d) is "
 			       "beyond the end of the bitmap (%u)\n",
 			       startbit, (e->highbit - mapunit));
 			goto bad;
@@ -405,7 +405,7 @@
 			tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
 			if (!tmp) {
 				printk(KERN_ERR
-				       "security: ebitmap: out of memory\n");
+				       "SELinux: ebitmap: out of memory\n");
 				rc = -ENOMEM;
 				goto bad;
 			}
@@ -418,7 +418,7 @@
 			}
 			n = tmp;
 		} else if (startbit <= n->startbit) {
-			printk(KERN_ERR "security: ebitmap: start bit %d"
+			printk(KERN_ERR "SELinux: ebitmap: start bit %d"
 			       " comes after start bit %d\n",
 			       startbit, n->startbit);
 			goto bad;
@@ -426,7 +426,7 @@
 
 		rc = next_entry(&map, fp, sizeof(u64));
 		if (rc < 0) {
-			printk(KERN_ERR "security: ebitmap: truncated map\n");
+			printk(KERN_ERR "SELinux: ebitmap: truncated map\n");
 			goto bad;
 		}
 		map = le64_to_cpu(map);
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index bd7d6a0..6bdb0ff 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -111,6 +111,11 @@
 		.version	= POLICYDB_VERSION_POLCAP,
 		.sym_num	= SYM_NUM,
 		.ocon_num	= OCON_NUM,
+	},
+	{
+		.version	= POLICYDB_VERSION_PERMISSIVE,
+		.sym_num	= SYM_NUM,
+		.ocon_num	= OCON_NUM,
 	}
 };
 
@@ -194,6 +199,7 @@
 		goto out_free_symtab;
 
 	ebitmap_init(&p->policycaps);
+	ebitmap_init(&p->permissive_map);
 
 out:
 	return rc;
@@ -401,14 +407,14 @@
 {
 	int i, rc = 0;
 
-	printk(KERN_DEBUG "security:  %d users, %d roles, %d types, %d bools",
+	printk(KERN_DEBUG "SELinux:  %d users, %d roles, %d types, %d bools",
 	       p->p_users.nprim, p->p_roles.nprim, p->p_types.nprim, p->p_bools.nprim);
 	if (selinux_mls_enabled)
 		printk(", %d sens, %d cats", p->p_levels.nprim,
 		       p->p_cats.nprim);
 	printk("\n");
 
-	printk(KERN_DEBUG "security:  %d classes, %d rules\n",
+	printk(KERN_DEBUG "SELinux:  %d classes, %d rules\n",
 	       p->p_classes.nprim, p->te_avtab.nel);
 
 #ifdef DEBUG_HASHES
@@ -687,6 +693,7 @@
 	kfree(p->type_attr_map);
 	kfree(p->undefined_perms);
 	ebitmap_destroy(&p->policycaps);
+	ebitmap_destroy(&p->permissive_map);
 
 	return;
 }
@@ -702,20 +709,20 @@
 
 	rc = sidtab_init(s);
 	if (rc) {
-		printk(KERN_ERR "security:  out of memory on SID table init\n");
+		printk(KERN_ERR "SELinux:  out of memory on SID table init\n");
 		goto out;
 	}
 
 	head = p->ocontexts[OCON_ISID];
 	for (c = head; c; c = c->next) {
 		if (!c->context[0].user) {
-			printk(KERN_ERR "security:  SID %s was never "
+			printk(KERN_ERR "SELinux:  SID %s was never "
 			       "defined.\n", c->u.name);
 			rc = -EINVAL;
 			goto out;
 		}
 		if (sidtab_insert(s, c->sid[0], &c->context[0])) {
-			printk(KERN_ERR "security:  unable to load initial "
+			printk(KERN_ERR "SELinux:  unable to load initial "
 			       "SID %s.\n", c->u.name);
 			rc = -EINVAL;
 			goto out;
@@ -809,13 +816,13 @@
 
 	items = le32_to_cpu(buf[0]);
 	if (items > ARRAY_SIZE(buf)) {
-		printk(KERN_ERR "security: mls:  range overflow\n");
+		printk(KERN_ERR "SELinux: mls:  range overflow\n");
 		rc = -EINVAL;
 		goto out;
 	}
 	rc = next_entry(buf, fp, sizeof(u32) * items);
 	if (rc < 0) {
-		printk(KERN_ERR "security: mls:  truncated range\n");
+		printk(KERN_ERR "SELinux: mls:  truncated range\n");
 		goto out;
 	}
 	r->level[0].sens = le32_to_cpu(buf[0]);
@@ -826,21 +833,21 @@
 
 	rc = ebitmap_read(&r->level[0].cat, fp);
 	if (rc) {
-		printk(KERN_ERR "security: mls:  error reading low "
+		printk(KERN_ERR "SELinux: mls:  error reading low "
 		       "categories\n");
 		goto out;
 	}
 	if (items > 1) {
 		rc = ebitmap_read(&r->level[1].cat, fp);
 		if (rc) {
-			printk(KERN_ERR "security: mls:  error reading high "
+			printk(KERN_ERR "SELinux: mls:  error reading high "
 			       "categories\n");
 			goto bad_high;
 		}
 	} else {
 		rc = ebitmap_cpy(&r->level[1].cat, &r->level[0].cat);
 		if (rc) {
-			printk(KERN_ERR "security: mls:  out of memory\n");
+			printk(KERN_ERR "SELinux: mls:  out of memory\n");
 			goto bad_high;
 		}
 	}
@@ -866,7 +873,7 @@
 
 	rc = next_entry(buf, fp, sizeof buf);
 	if (rc < 0) {
-		printk(KERN_ERR "security: context truncated\n");
+		printk(KERN_ERR "SELinux: context truncated\n");
 		goto out;
 	}
 	c->user = le32_to_cpu(buf[0]);
@@ -874,7 +881,7 @@
 	c->type = le32_to_cpu(buf[2]);
 	if (p->policyvers >= POLICYDB_VERSION_MLS) {
 		if (mls_read_range_helper(&c->range, fp)) {
-			printk(KERN_ERR "security: error reading MLS range of "
+			printk(KERN_ERR "SELinux: error reading MLS range of "
 			       "context\n");
 			rc = -EINVAL;
 			goto out;
@@ -882,7 +889,7 @@
 	}
 
 	if (!policydb_context_isvalid(p, c)) {
-		printk(KERN_ERR "security:  invalid security context\n");
+		printk(KERN_ERR "SELinux:  invalid security context\n");
 		context_destroy(c);
 		rc = -EINVAL;
 	}
@@ -1128,7 +1135,7 @@
 		cladatum->comdatum = hashtab_search(p->p_commons.table,
 						    cladatum->comkey);
 		if (!cladatum->comdatum) {
-			printk(KERN_ERR "security:  unknown common %s\n",
+			printk(KERN_ERR "SELinux:  unknown common %s\n",
 			       cladatum->comkey);
 			rc = -EINVAL;
 			goto bad;
@@ -1283,13 +1290,13 @@
 
 	rc = next_entry(buf, fp, sizeof buf);
 	if (rc < 0) {
-		printk(KERN_ERR "security: mls: truncated level\n");
+		printk(KERN_ERR "SELinux: mls: truncated level\n");
 		goto bad;
 	}
 	lp->sens = le32_to_cpu(buf[0]);
 
 	if (ebitmap_read(&lp->cat, fp)) {
-		printk(KERN_ERR "security: mls:  error reading level "
+		printk(KERN_ERR "SELinux: mls:  error reading level "
 		       "categories\n");
 		goto bad;
 	}
@@ -1491,7 +1498,7 @@
 		goto bad;
 
 	if (le32_to_cpu(buf[0]) != POLICYDB_MAGIC) {
-		printk(KERN_ERR "security:  policydb magic number 0x%x does "
+		printk(KERN_ERR "SELinux:  policydb magic number 0x%x does "
 		       "not match expected magic number 0x%x\n",
 		       le32_to_cpu(buf[0]), POLICYDB_MAGIC);
 		goto bad;
@@ -1499,27 +1506,27 @@
 
 	len = le32_to_cpu(buf[1]);
 	if (len != strlen(POLICYDB_STRING)) {
-		printk(KERN_ERR "security:  policydb string length %d does not "
+		printk(KERN_ERR "SELinux:  policydb string length %d does not "
 		       "match expected length %Zu\n",
 		       len, strlen(POLICYDB_STRING));
 		goto bad;
 	}
 	policydb_str = kmalloc(len + 1,GFP_KERNEL);
 	if (!policydb_str) {
-		printk(KERN_ERR "security:  unable to allocate memory for policydb "
+		printk(KERN_ERR "SELinux:  unable to allocate memory for policydb "
 		       "string of length %d\n", len);
 		rc = -ENOMEM;
 		goto bad;
 	}
 	rc = next_entry(policydb_str, fp, len);
 	if (rc < 0) {
-		printk(KERN_ERR "security:  truncated policydb string identifier\n");
+		printk(KERN_ERR "SELinux:  truncated policydb string identifier\n");
 		kfree(policydb_str);
 		goto bad;
 	}
 	policydb_str[len] = 0;
 	if (strcmp(policydb_str, POLICYDB_STRING)) {
-		printk(KERN_ERR "security:  policydb string %s does not match "
+		printk(KERN_ERR "SELinux:  policydb string %s does not match "
 		       "my string %s\n", policydb_str, POLICYDB_STRING);
 		kfree(policydb_str);
 		goto bad;
@@ -1536,7 +1543,7 @@
 	p->policyvers = le32_to_cpu(buf[0]);
 	if (p->policyvers < POLICYDB_VERSION_MIN ||
 	    p->policyvers > POLICYDB_VERSION_MAX) {
-	    	printk(KERN_ERR "security:  policydb version %d does not match "
+		printk(KERN_ERR "SELinux:  policydb version %d does not match "
 	    	       "my version range %d-%d\n",
 	    	       le32_to_cpu(buf[0]), POLICYDB_VERSION_MIN, POLICYDB_VERSION_MAX);
 	    	goto bad;
@@ -1570,16 +1577,20 @@
 	    ebitmap_read(&p->policycaps, fp) != 0)
 		goto bad;
 
+	if (p->policyvers >= POLICYDB_VERSION_PERMISSIVE &&
+	    ebitmap_read(&p->permissive_map, fp) != 0)
+		goto bad;
+
 	info = policydb_lookup_compat(p->policyvers);
 	if (!info) {
-		printk(KERN_ERR "security:  unable to find policy compat info "
+		printk(KERN_ERR "SELinux:  unable to find policy compat info "
 		       "for version %d\n", p->policyvers);
 		goto bad;
 	}
 
 	if (le32_to_cpu(buf[2]) != info->sym_num ||
 		le32_to_cpu(buf[3]) != info->ocon_num) {
-		printk(KERN_ERR "security:  policydb table sizes (%d,%d) do "
+		printk(KERN_ERR "SELinux:  policydb table sizes (%d,%d) do "
 		       "not match mine (%d,%d)\n", le32_to_cpu(buf[2]),
 			le32_to_cpu(buf[3]),
 		       info->sym_num, info->ocon_num);
@@ -1823,7 +1834,7 @@
 		for (genfs_p = NULL, genfs = p->genfs; genfs;
 		     genfs_p = genfs, genfs = genfs->next) {
 			if (strcmp(newgenfs->fstype, genfs->fstype) == 0) {
-				printk(KERN_ERR "security:  dup genfs "
+				printk(KERN_ERR "SELinux:  dup genfs "
 				       "fstype %s\n", newgenfs->fstype);
 				kfree(newgenfs->fstype);
 				kfree(newgenfs);
@@ -1873,7 +1884,7 @@
 				if (!strcmp(newc->u.name, c->u.name) &&
 				    (!c->v.sclass || !newc->v.sclass ||
 				     newc->v.sclass == c->v.sclass)) {
-					printk(KERN_ERR "security:  dup genfs "
+					printk(KERN_ERR "SELinux:  dup genfs "
 					       "entry (%s,%s)\n",
 					       newgenfs->fstype, c->u.name);
 					goto bad_newc;
@@ -1931,7 +1942,7 @@
 			if (rc)
 				goto bad;
 			if (!mls_range_isvalid(p, &rt->target_range)) {
-				printk(KERN_WARNING "security:  rangetrans:  invalid range\n");
+				printk(KERN_WARNING "SELinux:  rangetrans:  invalid range\n");
 				goto bad;
 			}
 			lrt = rt;
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h
index c4ce996..ba593a3 100644
--- a/security/selinux/ss/policydb.h
+++ b/security/selinux/ss/policydb.h
@@ -243,6 +243,8 @@
 
 	struct ebitmap policycaps;
 
+	struct ebitmap permissive_map;
+
 	unsigned int policyvers;
 
 	unsigned int reject_unknown : 1;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 3f2bad2..d750508 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -40,6 +40,7 @@
 #include <linux/sched.h>
 #include <linux/audit.h>
 #include <linux/mutex.h>
+#include <linux/selinux.h>
 #include <net/netlabel.h>
 
 #include "flask.h"
@@ -61,6 +62,7 @@
 unsigned int policydb_loaded_version;
 
 int selinux_policycap_netpeer;
+int selinux_policycap_openperm;
 
 /*
  * This is declared in avc.c
@@ -412,10 +414,35 @@
 	return 0;
 
 inval_class:
-	printk(KERN_ERR "%s:  unrecognized class %d\n", __FUNCTION__, tclass);
+	printk(KERN_ERR "%s:  unrecognized class %d\n", __func__, tclass);
 	return -EINVAL;
 }
 
+/*
+ * Given a sid find if the type has the permissive flag set
+ */
+int security_permissive_sid(u32 sid)
+{
+	struct context *context;
+	u32 type;
+	int rc;
+
+	POLICY_RDLOCK;
+
+	context = sidtab_search(&sidtab, sid);
+	BUG_ON(!context);
+
+	type = context->type;
+	/*
+	 * we are intentionally using type here, not type-1, the 0th bit may
+	 * someday indicate that we are globally setting permissive in policy.
+	 */
+	rc = ebitmap_get_bit(&policydb.permissive_map, type);
+
+	POLICY_RDUNLOCK;
+	return rc;
+}
+
 static int security_validtrans_handle_fail(struct context *ocontext,
                                            struct context *ncontext,
                                            struct context *tcontext,
@@ -1096,7 +1123,7 @@
 			continue;
 		if (i > p->p_classes.nprim) {
 			printk(KERN_INFO
-			       "security:  class %s not defined in policy\n",
+			       "SELinux:  class %s not defined in policy\n",
 			       def_class);
 			if (p->reject_unknown)
 				return -EINVAL;
@@ -1107,7 +1134,7 @@
 		pol_class = p->p_class_val_to_name[i-1];
 		if (strcmp(pol_class, def_class)) {
 			printk(KERN_ERR
-			       "security:  class %d is incorrect, found %s but should be %s\n",
+			       "SELinux:  class %d is incorrect, found %s but should be %s\n",
 			       i, pol_class, def_class);
 			return -EINVAL;
 		}
@@ -1125,7 +1152,7 @@
 		nprim = 1 << (perms->nprim - 1);
 		if (perm_val > nprim) {
 			printk(KERN_INFO
-			       "security:  permission %s in class %s not defined in policy\n",
+			       "SELinux:  permission %s in class %s not defined in policy\n",
 			       def_perm, pol_class);
 			if (p->reject_unknown)
 				return -EINVAL;
@@ -1136,14 +1163,14 @@
 		perdatum = hashtab_search(perms->table, def_perm);
 		if (perdatum == NULL) {
 			printk(KERN_ERR
-			       "security:  permission %s in class %s not found in policy, bad policy\n",
+			       "SELinux:  permission %s in class %s not found in policy, bad policy\n",
 			       def_perm, pol_class);
 			return -EINVAL;
 		}
 		pol_val = 1 << (perdatum->value - 1);
 		if (pol_val != perm_val) {
 			printk(KERN_ERR
-			       "security:  permission %s in class %s has incorrect value\n",
+			       "SELinux:  permission %s in class %s has incorrect value\n",
 			       def_perm, pol_class);
 			return -EINVAL;
 		}
@@ -1157,7 +1184,7 @@
 		BUG_ON(!cladatum);
 		if (!cladatum->comdatum) {
 			printk(KERN_ERR
-			       "security:  class %s should have an inherits clause but does not\n",
+			       "SELinux:  class %s should have an inherits clause but does not\n",
 			       pol_class);
 			return -EINVAL;
 		}
@@ -1172,7 +1199,7 @@
 			def_perm = kdefs->av_inherit[i].common_pts[j];
 			if (j >= perms->nprim) {
 				printk(KERN_INFO
-				       "security:  permission %s in class %s not defined in policy\n",
+				       "SELinux:  permission %s in class %s not defined in policy\n",
 				       def_perm, pol_class);
 				if (p->reject_unknown)
 					return -EINVAL;
@@ -1183,13 +1210,13 @@
 			perdatum = hashtab_search(perms->table, def_perm);
 			if (perdatum == NULL) {
 				printk(KERN_ERR
-				       "security:  permission %s in class %s not found in policy, bad policy\n",
+				       "SELinux:  permission %s in class %s not found in policy, bad policy\n",
 				       def_perm, pol_class);
 				return -EINVAL;
 			}
 			if (perdatum->value != j + 1) {
 				printk(KERN_ERR
-				       "security:  permission %s in class %s has incorrect value\n",
+				       "SELinux:  permission %s in class %s has incorrect value\n",
 				       def_perm, pol_class);
 				return -EINVAL;
 			}
@@ -1219,7 +1246,7 @@
 		u32 len;
 
 		context_struct_to_string(context, &s, &len);
-		printk(KERN_ERR "security:  context %s is invalid\n", s);
+		printk(KERN_ERR "SELinux:  context %s is invalid\n", s);
 		kfree(s);
 	}
 	return rc;
@@ -1299,7 +1326,7 @@
 bad:
 	context_struct_to_string(&oldc, &s, &len);
 	context_destroy(&oldc);
-	printk(KERN_ERR "security:  invalidating context %s\n", s);
+	printk(KERN_ERR "SELinux:  invalidating context %s\n", s);
 	kfree(s);
 	goto out;
 }
@@ -1308,6 +1335,8 @@
 {
 	selinux_policycap_netpeer = ebitmap_get_bit(&policydb.policycaps,
 						  POLICYDB_CAPABILITY_NETPEER);
+	selinux_policycap_openperm = ebitmap_get_bit(&policydb.policycaps,
+						  POLICYDB_CAPABILITY_OPENPERM);
 }
 
 extern void selinux_complete_init(void);
@@ -1350,7 +1379,7 @@
 		/* Verify that the kernel defined classes are correct. */
 		if (validate_classes(&policydb)) {
 			printk(KERN_ERR
-			       "security:  the definition of a class is incorrect\n");
+			       "SELinux:  the definition of a class is incorrect\n");
 			LOAD_UNLOCK;
 			sidtab_destroy(&sidtab);
 			policydb_destroy(&policydb);
@@ -1384,14 +1413,14 @@
 	/* Verify that the kernel defined classes are correct. */
 	if (validate_classes(&newpolicydb)) {
 		printk(KERN_ERR
-		       "security:  the definition of a class is incorrect\n");
+		       "SELinux:  the definition of a class is incorrect\n");
 		rc = -EINVAL;
 		goto err;
 	}
 
 	rc = security_preserve_bools(&newpolicydb);
 	if (rc) {
-		printk(KERN_ERR "security:  unable to preserve booleans\n");
+		printk(KERN_ERR "SELinux:  unable to preserve booleans\n");
 		goto err;
 	}
 
@@ -1443,17 +1472,11 @@
 
 /**
  * security_port_sid - Obtain the SID for a port.
- * @domain: communication domain aka address family
- * @type: socket type
  * @protocol: protocol number
  * @port: port number
  * @out_sid: security identifier
  */
-int security_port_sid(u16 domain,
-		      u16 type,
-		      u8 protocol,
-		      u16 port,
-		      u32 *out_sid)
+int security_port_sid(u8 protocol, u16 port, u32 *out_sid)
 {
 	struct ocontext *c;
 	int rc = 0;
@@ -2203,7 +2226,7 @@
 	match = hashtab_search(policydb.p_classes.table, class);
 	if (!match) {
 		printk(KERN_ERR "%s:  unrecognized class %s\n",
-			__FUNCTION__, class);
+			__func__, class);
 		rc = -EINVAL;
 		goto out;
 	}