Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm

* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm:
  dm: detect lost queue
  dm: publish dm_vcalloc
  dm: publish dm_table_unplug_all
  dm: publish dm_get_mapinfo
  dm: export struct dm_dev
  dm crypt: avoid unnecessary wait when splitting bio
  dm crypt: tidy ctx pending
  dm crypt: fix async inc_pending
  dm crypt: move dec_pending on error into write_io_submit
  dm crypt: remove inc_pending from write_io_submit
  dm crypt: tidy write loop pending
  dm crypt: tidy crypt alloc
  dm crypt: tidy inc pending
  dm exception store: use chunk_t for_areas
  dm exception store: introduce area_location function
  dm raid1: kcopyd should stop on error if errors handled
  dm mpath: remove is_active from struct dm_path
  dm mpath: use more error codes

Fixed up trivial conflict in drivers/md/dm-mpath.c manually.
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 5b5aba4..7306081 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -251,8 +251,6 @@
 	- how to execute Mono-based .NET binaries with the help of BINFMT_MISC.
 moxa-smartio
 	- file with info on installing/using Moxa multiport serial driver.
-mtrr.txt
-	- how to use PPro Memory Type Range Registers to increase performance.
 mutex-design.txt
 	- info on the generic mutex subsystem.
 namespaces/
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index d8b63d1..b8e8646 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -337,7 +337,7 @@
 	int i, count = dma_map_sg(dev, sglist, nents, direction);
 	struct scatterlist *sg;
 
-	for (i = 0, sg = sglist; i < count; i++, sg++) {
+	for_each_sg(sglist, sg, count, i) {
 		hw_address[i] = sg_dma_address(sg);
 		hw_len[i] = sg_dma_len(sg);
 	}
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index b7b1482..f5696ba 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -364,6 +364,10 @@
 !Eblock/blk-barrier.c
 !Eblock/blk-tag.c
 !Iblock/blk-tag.c
+!Eblock/blk-integrity.c
+!Iblock/blktrace.c
+!Iblock/genhd.c
+!Eblock/genhd.c
   </chapter>
 
   <chapter id="chrdev">
diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.txt
index c23cab1..7257676 100644
--- a/Documentation/block/deadline-iosched.txt
+++ b/Documentation/block/deadline-iosched.txt
@@ -30,12 +30,18 @@
 Similar to read_expire mentioned above, but for writes.
 
 
-fifo_batch
+fifo_batch	(number of requests)
 ----------
 
-When a read request expires its deadline, we must move some requests from
-the sorted io scheduler list to the block device dispatch queue. fifo_batch
-controls how many requests we move.
+Requests are grouped into ``batches'' of a particular data direction (read or
+write) which are serviced in increasing sector order.  To limit extra seeking,
+deadline expiries are only checked between batches.  fifo_batch controls the
+maximum number of requests per batch.
+
+This parameter tunes the balance between per-request latency and aggregate
+throughput.  When low latency is the primary concern, smaller is better (where
+a value of 1 yields first-come first-served behaviour).  Increasing fifo_batch
+generally improves throughput, at the cost of latency variation.
 
 
 writes_starved	(number of dispatches)
diff --git a/Documentation/cdrom/ide-cd b/Documentation/cdrom/ide-cd
index 91c0dcc..2c558cd 100644
--- a/Documentation/cdrom/ide-cd
+++ b/Documentation/cdrom/ide-cd
@@ -145,8 +145,7 @@
 
 To play an audio CD, you should first unmount and remove any data
 CDROM.  Any of the CDROM player programs should then work (workman,
-workbone, cdplayer, etc.).  Lacking anything else, you could use the
-cdtester program in Documentation/cdrom/sbpcd.
+workbone, cdplayer, etc.).
 
 On a few drives, you can read digital audio directly using a program
 such as cdda2wav.  The only types of drive which I've heard support
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1150444..329dcab 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -463,12 +463,6 @@
 			Range: 0 - 8192
 			Default: 64
 
-	disable_8254_timer
-	enable_8254_timer
-			[IA32/X86_64] Disable/Enable interrupt 0 timer routing
-			over the 8254 in addition to over the IO-APIC. The
-			kernel tries to set a sensible default.
-
 	hpet=		[X86-32,HPET] option to control HPET usage
 			Format: { enable (default) | disable | force }
 			disable: disable HPET and use PIT instead
@@ -1882,6 +1876,12 @@
 	shapers=	[NET]
 			Maximal number of shapers.
 
+	show_msr=	[x86] show boot-time MSR settings
+			Format: { <integer> }
+			Show boot-time (BIOS-initialized) MSR settings.
+			The parameter means the number of CPUs to show,
+			for example 1 means boot CPU only.
+
 	sim710=		[SCSI,HW]
 			See header of drivers/scsi/sim710.c.
 
diff --git a/Documentation/scsi/scsi_fc_transport.txt b/Documentation/scsi/scsi_fc_transport.txt
index 75143f0..38d324d6 100644
--- a/Documentation/scsi/scsi_fc_transport.txt
+++ b/Documentation/scsi/scsi_fc_transport.txt
@@ -436,6 +436,42 @@
     was updated to remove all vports for the fc_host as well.
 
 
+Transport supplied functions
+----------------------------
+
+The following functions are supplied by the FC-transport for use by LLDs.
+
+   fc_vport_create - create a vport
+   fc_vport_terminate - detach and remove a vport
+
+Details:
+
+/**
+ * fc_vport_create - Admin App or LLDD requests creation of a vport
+ * @shost:     scsi host the virtual port is connected to.
+ * @ids:       The world wide names, FC4 port roles, etc for
+ *              the virtual port.
+ *
+ * Notes:
+ *     This routine assumes no locks are held on entry.
+ */
+struct fc_vport *
+fc_vport_create(struct Scsi_Host *shost, struct fc_vport_identifiers *ids)
+
+/**
+ * fc_vport_terminate - Admin App or LLDD requests termination of a vport
+ * @vport:      fc_vport to be terminated
+ *
+ * Calls the LLDD vport_delete() function, then deallocates and removes
+ * the vport from the shost and object tree.
+ *
+ * Notes:
+ *      This routine assumes no locks are held on entry.
+ */
+int
+fc_vport_terminate(struct fc_vport *vport)
+
+
 Credits
 =======
 The following people have contributed to this document:
diff --git a/Documentation/x86/00-INDEX b/Documentation/x86/00-INDEX
new file mode 100644
index 0000000..dbe3377
--- /dev/null
+++ b/Documentation/x86/00-INDEX
@@ -0,0 +1,4 @@
+00-INDEX
+	- this file
+mtrr.txt
+	- how to use x86 Memory Type Range Registers to increase performance
diff --git a/Documentation/x86/i386/boot.txt b/Documentation/x86/boot.txt
similarity index 99%
rename from Documentation/x86/i386/boot.txt
rename to Documentation/x86/boot.txt
index 147bfe5..83c0033 100644
--- a/Documentation/x86/i386/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -308,7 +308,7 @@
 
 Field name:	start_sys
 Type:		read
-Offset/size:	0x20c/4
+Offset/size:	0x20c/2
 Protocol:	2.00+
 
   The load low segment (0x1000).  Obsolete.
diff --git a/Documentation/mtrr.txt b/Documentation/x86/mtrr.txt
similarity index 99%
rename from Documentation/mtrr.txt
rename to Documentation/x86/mtrr.txt
index c39ac39..cc071dc 100644
--- a/Documentation/mtrr.txt
+++ b/Documentation/x86/mtrr.txt
@@ -18,7 +18,7 @@
   The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
   MTRRs. These are supported.  The AMD Athlon family provide 8 Intel
   style MTRRs.
-  
+
   The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These
   are supported.
 
@@ -87,7 +87,7 @@
 reg01: base=0xfb000000 (4016MB), size=  16MB: write-combining, count=1
 reg02: base=0xfb000000 (4016MB), size=   4kB: uncachable, count=1
 
-Some cards (especially Voodoo Graphics boards) need this 4 kB area 
+Some cards (especially Voodoo Graphics boards) need this 4 kB area
 excluded from the beginning of the region because it is used for
 registers.
 
diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
index 17965f9..c93ff5f 100644
--- a/Documentation/x86/pat.txt
+++ b/Documentation/x86/pat.txt
@@ -14,6 +14,10 @@
 ones that will be supported at this time are Write-back, Uncached,
 Write-combined and Uncached Minus.
 
+
+PAT APIs
+--------
+
 There are many different APIs in the kernel that allows setting of memory
 attributes at the page level. In order to avoid aliasing, these interfaces
 should be used thoughtfully. Below is a table of interfaces available,
@@ -26,38 +30,38 @@
 API                    |    RAM   |  ACPI,...  |  Reserved/Holes  |
 -----------------------|----------|------------|------------------|
                        |          |            |                  |
-ioremap                |    --    |    UC      |       UC         |
+ioremap                |    --    |    UC-     |       UC-        |
                        |          |            |                  |
 ioremap_cache          |    --    |    WB      |       WB         |
                        |          |            |                  |
-ioremap_nocache        |    --    |    UC      |       UC         |
+ioremap_nocache        |    --    |    UC-     |       UC-        |
                        |          |            |                  |
 ioremap_wc             |    --    |    --      |       WC         |
                        |          |            |                  |
-set_memory_uc          |    UC    |    --      |       --         |
+set_memory_uc          |    UC-   |    --      |       --         |
  set_memory_wb         |          |            |                  |
                        |          |            |                  |
 set_memory_wc          |    WC    |    --      |       --         |
  set_memory_wb         |          |            |                  |
                        |          |            |                  |
-pci sysfs resource     |    --    |    --      |       UC         |
+pci sysfs resource     |    --    |    --      |       UC-        |
                        |          |            |                  |
 pci sysfs resource_wc  |    --    |    --      |       WC         |
  is IORESOURCE_PREFETCH|          |            |                  |
                        |          |            |                  |
-pci proc               |    --    |    --      |       UC         |
+pci proc               |    --    |    --      |       UC-        |
  !PCIIOC_WRITE_COMBINE |          |            |                  |
                        |          |            |                  |
 pci proc               |    --    |    --      |       WC         |
  PCIIOC_WRITE_COMBINE  |          |            |                  |
                        |          |            |                  |
-/dev/mem               |    --    |    UC      |       UC         |
+/dev/mem               |    --    |  WB/WC/UC- |    WB/WC/UC-     |
  read-write            |          |            |                  |
                        |          |            |                  |
-/dev/mem               |    --    |    UC      |       UC         |
+/dev/mem               |    --    |    UC-     |       UC-        |
  mmap SYNC flag        |          |            |                  |
                        |          |            |                  |
-/dev/mem               |    --    |  WB/WC/UC  |    WB/WC/UC      |
+/dev/mem               |    --    |  WB/WC/UC- |    WB/WC/UC-     |
  mmap !SYNC flag       |          |(from exist-|  (from exist-    |
  and                   |          |  ing alias)|    ing alias)    |
  any alias to this area|          |            |                  |
@@ -68,7 +72,7 @@
  and                   |          |            |                  |
  MTRR says WB          |          |            |                  |
                        |          |            |                  |
-/dev/mem               |    --    |    --      |    UC_MINUS      |
+/dev/mem               |    --    |    --      |       UC-        |
  mmap !SYNC flag       |          |            |                  |
  no alias to this area |          |            |                  |
  and                   |          |            |                  |
@@ -98,3 +102,35 @@
 
 Drivers should use set_memory_[uc|wc] to set access type for RAM ranges.
 
+
+PAT debugging
+-------------
+
+With CONFIG_DEBUG_FS enabled, PAT memtype list can be examined by
+
+# mount -t debugfs debugfs /sys/kernel/debug
+# cat /sys/kernel/debug/x86/pat_memtype_list
+PAT memtype list:
+uncached-minus @ 0x7fadf000-0x7fae0000
+uncached-minus @ 0x7fb19000-0x7fb1a000
+uncached-minus @ 0x7fb1a000-0x7fb1b000
+uncached-minus @ 0x7fb1b000-0x7fb1c000
+uncached-minus @ 0x7fb1c000-0x7fb1d000
+uncached-minus @ 0x7fb1d000-0x7fb1e000
+uncached-minus @ 0x7fb1e000-0x7fb25000
+uncached-minus @ 0x7fb25000-0x7fb26000
+uncached-minus @ 0x7fb26000-0x7fb27000
+uncached-minus @ 0x7fb27000-0x7fb28000
+uncached-minus @ 0x7fb28000-0x7fb2e000
+uncached-minus @ 0x7fb2e000-0x7fb2f000
+uncached-minus @ 0x7fb2f000-0x7fb30000
+uncached-minus @ 0x7fb31000-0x7fb32000
+uncached-minus @ 0x80000000-0x90000000
+
+This list shows physical address ranges and various PAT settings used to
+access those physical address ranges.
+
+Another, more verbose way of getting PAT related debug messages is with
+"debugpat" boot parameter. With this parameter, various debug messages are
+printed to dmesg log.
+
diff --git a/Documentation/x86/i386/usb-legacy-support.txt b/Documentation/x86/usb-legacy-support.txt
similarity index 100%
rename from Documentation/x86/i386/usb-legacy-support.txt
rename to Documentation/x86/usb-legacy-support.txt
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index b0c7b6c..72ffb53 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -54,10 +54,6 @@
 		 apicmaintimer. Useful when your PIT timer is totally
 		 broken.
 
-   disable_8254_timer / enable_8254_timer
-		 Enable interrupt 0 timer routing over the 8254 in addition to over
-	         the IO-APIC. The kernel tries to set a sensible default.
-
 Early Console
 
    syntax: earlyprintk=vga
diff --git a/Documentation/x86/i386/zero-page.txt b/Documentation/x86/zero-page.txt
similarity index 100%
rename from Documentation/x86/i386/zero-page.txt
rename to Documentation/x86/zero-page.txt
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ed92864..97f0d2b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,6 +29,7 @@
 	select HAVE_FTRACE
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 	select HAVE_ARCH_KGDB if !X86_VOYAGER
+	select HAVE_ARCH_TRACEHOOK
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 
@@ -1020,7 +1021,7 @@
 
 config ARCH_FLATMEM_ENABLE
 	def_bool y
-	depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC && !NUMA
+	depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA
 
 config ARCH_DISCONTIGMEM_ENABLE
 	def_bool y
@@ -1036,7 +1037,7 @@
 
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
-	depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC)
+	depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_GENERICARCH
 	select SPARSEMEM_STATIC if X86_32
 	select SPARSEMEM_VMEMMAP_ENABLE if X86_64
 
@@ -1117,10 +1118,10 @@
 	  You can safely say Y even if your machine doesn't have MTRRs, you'll
 	  just add about 9 KB to your kernel.
 
-	  See <file:Documentation/mtrr.txt> for more information.
+	  See <file:Documentation/x86/mtrr.txt> for more information.
 
 config MTRR_SANITIZER
-	bool
+	def_bool y
 	prompt "MTRR cleanup support"
 	depends on MTRR
 	help
@@ -1131,7 +1132,7 @@
 	  The largest mtrr entry size for a continous block can be set with
 	  mtrr_chunk_size.
 
-	  If unsure, say N.
+	  If unsure, say Y.
 
 config MTRR_SANITIZER_ENABLE_DEFAULT
 	int "MTRR cleanup enable value (0-1)"
@@ -1191,7 +1192,6 @@
 config SECCOMP
 	def_bool y
 	prompt "Enable seccomp to safely compute untrusted bytecode"
-	depends on PROC_FS
 	help
 	  This kernel feature is useful for number crunching applications
 	  that may need to compute untrusted bytecode during their
@@ -1199,7 +1199,7 @@
 	  the process as file descriptors supporting the read/write
 	  syscalls, it's possible to isolate those applications in
 	  their own address space using seccomp. Once seccomp is
-	  enabled via /proc/<pid>/seccomp, it cannot be disabled
+	  enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
 	  and the task is only allowed to execute a few safe syscalls
 	  defined by each seccomp mode.
 
@@ -1356,14 +1356,14 @@
 	  Don't change this unless you know what you are doing.
 
 config HOTPLUG_CPU
-	bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)"
-	depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER
+	bool "Support for hot-pluggable CPUs"
+	depends on SMP && HOTPLUG && !X86_VOYAGER
 	---help---
-	  Say Y here to experiment with turning CPUs off and on, and to
-	  enable suspend on SMP systems. CPUs can be controlled through
-	  /sys/devices/system/cpu.
-	  Say N if you want to disable CPU hotplug and don't need to
-	  suspend.
+	  Say Y here to allow turning CPUs off and on. CPUs can be
+	  controlled through /sys/devices/system/cpu.
+	  ( Note: power management support will enable this option
+	    automatically on SMP systems. )
+	  Say N if you want to disable CPU hotplug.
 
 config COMPAT_VDSO
 	def_bool y
@@ -1378,6 +1378,51 @@
 
 	  If unsure, say Y.
 
+config CMDLINE_BOOL
+	bool "Built-in kernel command line"
+	default n
+	help
+	  Allow for specifying boot arguments to the kernel at
+	  build time.  On some systems (e.g. embedded ones), it is
+	  necessary or convenient to provide some or all of the
+	  kernel boot arguments with the kernel itself (that is,
+	  to not rely on the boot loader to provide them.)
+
+	  To compile command line arguments into the kernel,
+	  set this option to 'Y', then fill in the
+	  the boot arguments in CONFIG_CMDLINE.
+
+	  Systems with fully functional boot loaders (i.e. non-embedded)
+	  should leave this option set to 'N'.
+
+config CMDLINE
+	string "Built-in kernel command string"
+	depends on CMDLINE_BOOL
+	default ""
+	help
+	  Enter arguments here that should be compiled into the kernel
+	  image and used at boot time.  If the boot loader provides a
+	  command line at boot time, it is appended to this string to
+	  form the full kernel command line, when the system boots.
+
+	  However, you can use the CONFIG_CMDLINE_OVERRIDE option to
+	  change this behavior.
+
+	  In most cases, the command line (whether built-in or provided
+	  by the boot loader) should specify the device for the root
+	  file system.
+
+config CMDLINE_OVERRIDE
+	bool "Built-in command line overrides boot loader arguments"
+	default n
+	depends on CMDLINE_BOOL
+	help
+	  Set this option to 'Y' to have the kernel ignore the boot loader
+	  command line, and use ONLY the built-in command line.
+
+	  This is used to work around broken boot loaders.  This should
+	  be set to 'N' under normal conditions.
+
 endmenu
 
 config ARCH_ENABLE_MEMORY_HOTPLUG
@@ -1773,7 +1818,7 @@
 
 config SYSVIPC_COMPAT
 	def_bool y
-	depends on X86_64 && COMPAT && SYSVIPC
+	depends on COMPAT && SYSVIPC
 
 endmenu
 
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b225219..60a8576 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -418,3 +418,21 @@
 config X86_DEBUGCTLMSR
 	def_bool y
 	depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
+
+config X86_DS
+	bool "Debug Store support"
+	default y
+	help
+	  Add support for Debug Store.
+	  This allows the kernel to provide a memory buffer to the hardware
+	  to store various profiling and tracing events.
+
+config X86_PTRACE_BTS
+	bool "ptrace interface to Branch Trace Store"
+	default y
+	depends on (X86_DS && X86_DEBUGCTLMSR)
+	help
+	  Add a ptrace interface to allow collecting an execution trace
+	  of the traced task.
+	  This collects control flow changes in a (cyclic) buffer and allows
+	  debuggers to fill in the gaps and show an execution trace of the debuggee.
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index ba7736c..29c5fbf 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -137,14 +137,15 @@
  */
 	movl output_len(%ebx), %eax
 	pushl %eax
+			# push arguments for decompress_kernel:
 	pushl %ebp	# output address
 	movl input_len(%ebx), %eax
 	pushl %eax	# input_len
 	leal input_data(%ebx), %eax
 	pushl %eax	# input_data
 	leal boot_heap(%ebx), %eax
-	pushl %eax	# heap area as third argument
-	pushl %esi	# real mode pointer as second arg
+	pushl %eax	# heap area
+	pushl %esi	# real mode pointer
 	call decompress_kernel
 	addl $20, %esp
 	popl %ecx
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 9fea737..5780d36 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -16,7 +16,7 @@
  */
 #undef CONFIG_PARAVIRT
 #ifdef CONFIG_X86_32
-#define _ASM_DESC_H_ 1
+#define ASM_X86__DESC_H 1
 #endif
 
 #ifdef CONFIG_X86_64
@@ -27,7 +27,7 @@
 #include <linux/linkage.h>
 #include <linux/screen_info.h>
 #include <linux/elf.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/page.h>
 #include <asm/boot.h>
 #include <asm/bootparam.h>
@@ -251,7 +251,7 @@
 				y--;
 			}
 		} else {
-			vidmem [(x + cols * y) * 2] = c;
+			vidmem[(x + cols * y) * 2] = c;
 			if (++x >= cols) {
 				x = 0;
 				if (++y >= lines) {
@@ -277,7 +277,8 @@
 	int i;
 	char *ss = s;
 
-	for (i = 0; i < n; i++) ss[i] = c;
+	for (i = 0; i < n; i++)
+		ss[i] = c;
 	return s;
 }
 
@@ -287,7 +288,8 @@
 	const char *s = src;
 	char *d = dest;
 
-	for (i = 0; i < n; i++) d[i] = s[i];
+	for (i = 0; i < n; i++)
+		d[i] = s[i];
 	return dest;
 }
 
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index af86e43..b993062 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -30,7 +30,6 @@
 SYSSIZE		= DEF_SYSSIZE		/* system size: # of 16-byte clicks */
 					/* to be loaded */
 ROOT_DEV	= 0			/* ROOT_DEV is now written by "build" */
-SWAP_DEV	= 0			/* SWAP_DEV is now written by "build" */
 
 #ifndef SVGA_MODE
 #define SVGA_MODE ASK_VGA
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 104275e..ef9a520 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.27-rc4
-# Mon Aug 25 15:04:00 2008
+# Linux kernel version: 2.6.27-rc5
+# Wed Sep  3 17:23:09 2008
 #
 # CONFIG_64BIT is not set
 CONFIG_X86_32=y
@@ -202,7 +202,7 @@
 # CONFIG_M586 is not set
 # CONFIG_M586TSC is not set
 # CONFIG_M586MMX is not set
-# CONFIG_M686 is not set
+CONFIG_M686=y
 # CONFIG_MPENTIUMII is not set
 # CONFIG_MPENTIUMIII is not set
 # CONFIG_MPENTIUMM is not set
@@ -221,13 +221,14 @@
 # CONFIG_MVIAC3_2 is not set
 # CONFIG_MVIAC7 is not set
 # CONFIG_MPSC is not set
-CONFIG_MCORE2=y
+# CONFIG_MCORE2 is not set
 # CONFIG_GENERIC_CPU is not set
 CONFIG_X86_GENERIC=y
 CONFIG_X86_CPU=y
 CONFIG_X86_CMPXCHG=y
 CONFIG_X86_L1_CACHE_SHIFT=7
 CONFIG_X86_XADD=y
+# CONFIG_X86_PPRO_FENCE is not set
 CONFIG_X86_WP_WORKS_OK=y
 CONFIG_X86_INVLPG=y
 CONFIG_X86_BSWAP=y
@@ -235,14 +236,15 @@
 CONFIG_X86_INTEL_USERCOPY=y
 CONFIG_X86_USE_PPRO_CHECKSUM=y
 CONFIG_X86_TSC=y
+CONFIG_X86_CMOV=y
 CONFIG_X86_MINIMUM_CPU_FAMILY=4
 CONFIG_X86_DEBUGCTLMSR=y
 CONFIG_HPET_TIMER=y
 CONFIG_HPET_EMULATE_RTC=y
 CONFIG_DMI=y
 # CONFIG_IOMMU_HELPER is not set
-CONFIG_NR_CPUS=4
-# CONFIG_SCHED_SMT is not set
+CONFIG_NR_CPUS=64
+CONFIG_SCHED_SMT=y
 CONFIG_SCHED_MC=y
 # CONFIG_PREEMPT_NONE is not set
 CONFIG_PREEMPT_VOLUNTARY=y
@@ -254,7 +256,8 @@
 # CONFIG_TOSHIBA is not set
 # CONFIG_I8K is not set
 CONFIG_X86_REBOOTFIXUPS=y
-# CONFIG_MICROCODE is not set
+CONFIG_MICROCODE=y
+CONFIG_MICROCODE_OLD_INTERFACE=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
 # CONFIG_NOHIGHMEM is not set
@@ -2115,7 +2118,7 @@
 CONFIG_DEFAULT_IO_DELAY_TYPE=0
 CONFIG_DEBUG_BOOT_PARAMS=y
 # CONFIG_CPA_DEBUG is not set
-# CONFIG_OPTIMIZE_INLINING is not set
+CONFIG_OPTIMIZE_INLINING=y
 
 #
 # Security options
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 678c8ac..e620ea6 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.27-rc4
-# Mon Aug 25 14:40:46 2008
+# Linux kernel version: 2.6.27-rc5
+# Wed Sep  3 17:13:39 2008
 #
 CONFIG_64BIT=y
 # CONFIG_X86_32 is not set
@@ -218,17 +218,14 @@
 # CONFIG_MVIAC3_2 is not set
 # CONFIG_MVIAC7 is not set
 # CONFIG_MPSC is not set
-CONFIG_MCORE2=y
-# CONFIG_GENERIC_CPU is not set
+# CONFIG_MCORE2 is not set
+CONFIG_GENERIC_CPU=y
 CONFIG_X86_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=64
-CONFIG_X86_INTERNODE_CACHE_BYTES=64
+CONFIG_X86_L1_CACHE_BYTES=128
+CONFIG_X86_INTERNODE_CACHE_BYTES=128
 CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_L1_CACHE_SHIFT=7
 CONFIG_X86_WP_WORKS_OK=y
-CONFIG_X86_INTEL_USERCOPY=y
-CONFIG_X86_USE_PPRO_CHECKSUM=y
-CONFIG_X86_P6_NOP=y
 CONFIG_X86_TSC=y
 CONFIG_X86_CMPXCHG64=y
 CONFIG_X86_CMOV=y
@@ -243,9 +240,8 @@
 CONFIG_AMD_IOMMU=y
 CONFIG_SWIOTLB=y
 CONFIG_IOMMU_HELPER=y
-# CONFIG_MAXSMP is not set
-CONFIG_NR_CPUS=4
-# CONFIG_SCHED_SMT is not set
+CONFIG_NR_CPUS=64
+CONFIG_SCHED_SMT=y
 CONFIG_SCHED_MC=y
 # CONFIG_PREEMPT_NONE is not set
 CONFIG_PREEMPT_VOLUNTARY=y
@@ -254,7 +250,8 @@
 CONFIG_X86_IO_APIC=y
 # CONFIG_X86_MCE is not set
 # CONFIG_I8K is not set
-# CONFIG_MICROCODE is not set
+CONFIG_MICROCODE=y
+CONFIG_MICROCODE_OLD_INTERFACE=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
 CONFIG_NUMA=y
@@ -290,7 +287,7 @@
 CONFIG_VIRT_TO_BUS=y
 CONFIG_MTRR=y
 # CONFIG_MTRR_SANITIZER is not set
-# CONFIG_X86_PAT is not set
+CONFIG_X86_PAT=y
 CONFIG_EFI=y
 CONFIG_SECCOMP=y
 # CONFIG_HZ_100 is not set
@@ -2089,7 +2086,7 @@
 CONFIG_DEFAULT_IO_DELAY_TYPE=0
 CONFIG_DEBUG_BOOT_PARAMS=y
 # CONFIG_CPA_DEBUG is not set
-# CONFIG_OPTIMIZE_INLINING is not set
+CONFIG_OPTIMIZE_INLINING=y
 
 #
 # Security options
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index a0e1dbe..127ec3f 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -85,8 +85,10 @@
 	dump->regs.ax = regs->ax;
 	dump->regs.ds = current->thread.ds;
 	dump->regs.es = current->thread.es;
-	asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs;
-	asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
+	savesegment(fs, fs);
+	dump->regs.fs = fs;
+	savesegment(gs, gs);
+	dump->regs.gs = gs;
 	dump->regs.orig_ax = regs->orig_ax;
 	dump->regs.ip = regs->ip;
 	dump->regs.cs = regs->cs;
@@ -430,8 +432,9 @@
 	current->mm->start_stack =
 		(unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
 	/* start thread */
-	asm volatile("movl %0,%%fs" :: "r" (0)); \
-	asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS));
+	loadsegment(fs, 0);
+	loadsegment(ds, __USER32_DS);
+	loadsegment(es, __USER32_DS);
 	load_gs_index(0);
 	(regs)->ip = ex.a_entry;
 	(regs)->sp = current->mm->start_stack;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 20af4c7..f1a2ac7 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -206,7 +206,7 @@
 	{ unsigned int cur;						\
 	  unsigned short pre;						\
 	  err |= __get_user(pre, &sc->seg);				\
-	  asm volatile("movl %%" #seg ",%0" : "=r" (cur));		\
+	  savesegment(seg, cur);					\
 	  pre |= mask;							\
 	  if (pre != cur) loadsegment(seg, pre); }
 
@@ -235,7 +235,7 @@
 	 */
 	err |= __get_user(gs, &sc->gs);
 	gs |= 3;
-	asm("movl %%gs,%0" : "=r" (oldgs));
+	savesegment(gs, oldgs);
 	if (gs != oldgs)
 		load_gs_index(gs);
 
@@ -355,14 +355,13 @@
 {
 	int tmp, err = 0;
 
-	tmp = 0;
-	__asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+	savesegment(gs, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
-	__asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+	savesegment(fs, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
-	__asm__("movl %%ds,%0" : "=r"(tmp): "0"(tmp));
+	savesegment(ds, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->ds);
-	__asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp));
+	savesegment(es, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->es);
 
 	err |= __put_user((u32)regs->di, &sc->di);
@@ -498,8 +497,8 @@
 	regs->dx = 0;
 	regs->cx = 0;
 
-	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
-	asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+	loadsegment(ds, __USER32_DS);
+	loadsegment(es, __USER32_DS);
 
 	regs->cs = __USER32_CS;
 	regs->ss = __USER32_DS;
@@ -591,8 +590,8 @@
 	regs->dx = (unsigned long) &frame->info;
 	regs->cx = (unsigned long) &frame->uc;
 
-	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
-	asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+	loadsegment(ds, __USER32_DS);
+	loadsegment(es, __USER32_DS);
 
 	regs->cs = __USER32_CS;
 	regs->ss = __USER32_DS;
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index d3c6408..beda423 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -556,15 +556,6 @@
 	return ret;
 }
 
-/* These are here just in case some old ia32 binary calls it. */
-asmlinkage long sys32_pause(void)
-{
-	current->state = TASK_INTERRUPTIBLE;
-	schedule();
-	return -ERESTARTNOHAND;
-}
-
-
 #ifdef CONFIG_SYSCTL_SYSCALL
 struct sysctl_ia32 {
 	unsigned int	name;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c102af8..7d40ef7 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -58,7 +58,6 @@
 #ifdef	CONFIG_X86_64
 
 #include <asm/proto.h>
-#include <asm/genapic.h>
 
 #else				/* X86 */
 
@@ -97,8 +96,6 @@
 #warning ACPI uses CMPXCHG, i486 and later hardware
 #endif
 
-static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
-
 /* --------------------------------------------------------------------------
                               Boot-time Configuration
    -------------------------------------------------------------------------- */
@@ -160,6 +157,8 @@
 struct acpi_mcfg_allocation *pci_mmcfg_config;
 int pci_mmcfg_config_num;
 
+static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
+
 static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
 {
 	if (!strcmp(mcfg->header.oem_id, "SGI"))
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65a0c1b..fb04e49 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -231,25 +231,25 @@
 			continue;
 		if (*ptr > text_end)
 			continue;
-		text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
+		/* turn DS segment override prefix into lock prefix */
+		text_poke(*ptr, ((unsigned char []){0xf0}), 1);
 	};
 }
 
 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
 {
 	u8 **ptr;
-	char insn[1];
 
 	if (noreplace_smp)
 		return;
 
-	add_nops(insn, 1);
 	for (ptr = start; ptr < end; ptr++) {
 		if (*ptr < text)
 			continue;
 		if (*ptr > text_end)
 			continue;
-		text_poke(*ptr, insn, 1);
+		/* turn lock prefix into DS segment override prefix */
+		text_poke(*ptr, ((unsigned char []){0x3E}), 1);
 	};
 }
 
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 44e2182..9a32b37 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -455,11 +455,11 @@
 		   force_iommu ||
 		   valid_agp ||
 		   fallback_aper_force) {
-		printk(KERN_ERR
+		printk(KERN_INFO
 			"Your BIOS doesn't leave a aperture memory hole\n");
-		printk(KERN_ERR
+		printk(KERN_INFO
 			"Please enable the IOMMU option in the BIOS setup\n");
-		printk(KERN_ERR
+		printk(KERN_INFO
 			"This costs you %d MB of RAM\n",
 				32 << fallback_aper_order);
 
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 732d1f4..5145a6e 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -228,7 +228,6 @@
 #include <linux/suspend.h>
 #include <linux/kthread.h>
 #include <linux/jiffies.h>
-#include <linux/smp_lock.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index aa89387..505543a 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -22,7 +22,7 @@
 
 #define __NO_STUBS 1
 #undef __SYSCALL
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 #define __SYSCALL(nr, sym) [nr] = 1,
 static char syscalls[] = {
 #include <asm/unistd.h>
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index c639bd5..fdd585f 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -25,11 +25,11 @@
 {
 	const char *str;
 	switch (status) {
-	case  0: str = "Call completed without error"; break;
-	case -1: str = "Not implemented"; break;
-	case -2: str = "Invalid argument"; break;
-	case -3: str = "Call completed with error"; break;
-	default: str = "Unknown BIOS status code"; break;
+	case  0: str = "Call completed without error";	break;
+	case -1: str = "Not implemented";		break;
+	case -2: str = "Invalid argument";		break;
+	case -3: str = "Call completed with error";	break;
+	default: str = "Unknown BIOS status code";	break;
 	}
 	return str;
 }
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index a11f5d4..305b465 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -430,6 +430,49 @@
 }
 __setup("noclflush", setup_noclflush);
 
+struct msr_range {
+	unsigned min;
+	unsigned max;
+};
+
+static struct msr_range msr_range_array[] __cpuinitdata = {
+	{ 0x00000000, 0x00000418},
+	{ 0xc0000000, 0xc000040b},
+	{ 0xc0010000, 0xc0010142},
+	{ 0xc0011000, 0xc001103b},
+};
+
+static void __cpuinit print_cpu_msr(void)
+{
+	unsigned index;
+	u64 val;
+	int i;
+	unsigned index_min, index_max;
+
+	for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
+		index_min = msr_range_array[i].min;
+		index_max = msr_range_array[i].max;
+		for (index = index_min; index < index_max; index++) {
+			if (rdmsrl_amd_safe(index, &val))
+				continue;
+			printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
+		}
+	}
+}
+
+static int show_msr __cpuinitdata;
+static __init int setup_show_msr(char *arg)
+{
+	int num;
+
+	get_option(&arg, &num);
+
+	if (num > 0)
+		show_msr = num;
+	return 1;
+}
+__setup("show_msr=", setup_show_msr);
+
 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 {
 	if (c->x86_model_id[0])
@@ -439,6 +482,14 @@
 		printk(KERN_CONT " stepping %02x\n", c->x86_mask);
 	else
 		printk(KERN_CONT "\n");
+
+#ifdef CONFIG_SMP
+	if (c->cpu_index < show_msr)
+		print_cpu_msr();
+#else
+	if (show_msr)
+		print_cpu_msr();
+#endif
 }
 
 static __init int setup_disablecpuid(char *arg)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b75f256..f113ef4 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -222,10 +222,11 @@
 			set_cpu_cap(c, X86_FEATURE_BTS);
 		if (!(l1 & (1<<12)))
 			set_cpu_cap(c, X86_FEATURE_PEBS);
+		ds_init_intel(c);
 	}
 
 	if (cpu_has_bts)
-		ds_init_intel(c);
+		ptrace_bts_init_intel(c);
 
 	/*
 	 * See if we have a good local APIC by checking for buggy Pentia,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index cb7d3b6..4e8d77f 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -401,12 +401,7 @@
 		tmp |= ~((1<<(hi - 1)) - 1);
 
 		if (tmp != mask_lo) {
-			static int once = 1;
-
-			if (once) {
-				printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
-				once = 0;
-			}
+			WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
 			mask_lo = tmp;
 		}
 	}
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 84c480b..4c42146 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -405,9 +405,9 @@
 			}
 			/* RED-PEN: base can be > 32bit */ 
 			len += seq_printf(seq, 
-				   "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
+				   "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
 			     i, base, base >> (20 - PAGE_SHIFT), size, factor,
-			     mtrr_attrib_to_str(type), mtrr_usage_table[i]);
+			     mtrr_usage_table[i], mtrr_attrib_to_str(type));
 		}
 	}
 	return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 885c826..c78c048 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -729,7 +729,7 @@
 	mtrr_type type;
 };
 
-struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
 static int __initdata debug_print;
 
 static int __init
@@ -759,7 +759,8 @@
 	/* take out UC ranges */
 	for (i = 0; i < num_var_ranges; i++) {
 		type = range_state[i].type;
-		if (type != MTRR_TYPE_UNCACHABLE)
+		if (type != MTRR_TYPE_UNCACHABLE &&
+		    type != MTRR_TYPE_WRPROT)
 			continue;
 		size = range_state[i].size_pfn;
 		if (!size)
@@ -836,6 +837,13 @@
 }
 early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
 
+static int __init mtrr_cleanup_debug_setup(char *str)
+{
+	debug_print = 1;
+	return 0;
+}
+early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
+
 struct var_mtrr_state {
 	unsigned long	range_startk;
 	unsigned long	range_sizek;
@@ -898,6 +906,27 @@
 	}
 }
 
+static unsigned long to_size_factor(unsigned long sizek, char *factorp)
+{
+	char factor;
+	unsigned long base = sizek;
+
+	if (base & ((1<<10) - 1)) {
+		/* not MB alignment */
+		factor = 'K';
+	} else if (base & ((1<<20) - 1)){
+		factor = 'M';
+		base >>= 10;
+	} else {
+		factor = 'G';
+		base >>= 20;
+	}
+
+	*factorp = factor;
+
+	return base;
+}
+
 static unsigned int __init
 range_to_mtrr(unsigned int reg, unsigned long range_startk,
 	      unsigned long range_sizek, unsigned char type)
@@ -919,13 +948,21 @@
 			align = max_align;
 
 		sizek = 1 << align;
-		if (debug_print)
+		if (debug_print) {
+			char start_factor = 'K', size_factor = 'K';
+			unsigned long start_base, size_base;
+
+			start_base = to_size_factor(range_startk, &start_factor),
+			size_base = to_size_factor(sizek, &size_factor),
+
 			printk(KERN_DEBUG "Setting variable MTRR %d, "
-				"base: %ldMB, range: %ldMB, type %s\n",
-				reg, range_startk >> 10, sizek >> 10,
+				"base: %ld%cB, range: %ld%cB, type %s\n",
+				reg, start_base, start_factor,
+				size_base, size_factor,
 				(type == MTRR_TYPE_UNCACHABLE)?"UC":
 				    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
 				);
+		}
 		save_var_mtrr(reg++, range_startk, sizek, type);
 		range_startk += sizek;
 		range_sizek -= sizek;
@@ -970,6 +1007,8 @@
 	/* try to append some small hole */
 	range0_basek = state->range_startk;
 	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+
+	/* no increase */
 	if (range0_sizek == state->range_sizek) {
 		if (debug_print)
 			printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
@@ -980,13 +1019,40 @@
 		return 0;
 	}
 
-	range0_sizek -= chunk_sizek;
-	if (range0_sizek && sizek) {
-	    while (range0_basek + range0_sizek > (basek + sizek)) {
-		range0_sizek -= chunk_sizek;
-		if (!range0_sizek)
-			break;
-	    }
+	/* only cut back, when it is not the last */
+	if (sizek) {
+		while (range0_basek + range0_sizek > (basek + sizek)) {
+			if (range0_sizek >= chunk_sizek)
+				range0_sizek -= chunk_sizek;
+			else
+				range0_sizek = 0;
+
+			if (!range0_sizek)
+				break;
+		}
+	}
+
+second_try:
+	range_basek = range0_basek + range0_sizek;
+
+	/* one hole in the middle */
+	if (range_basek > basek && range_basek <= (basek + sizek))
+		second_sizek = range_basek - basek;
+
+	if (range0_sizek > state->range_sizek) {
+
+		/* one hole in middle or at end */
+		hole_sizek = range0_sizek - state->range_sizek - second_sizek;
+
+		/* hole size should be less than half of range0 size */
+		if (hole_sizek >= (range0_sizek >> 1) &&
+		    range0_sizek >= chunk_sizek) {
+			range0_sizek -= chunk_sizek;
+			second_sizek = 0;
+			hole_sizek = 0;
+
+			goto second_try;
+		}
 	}
 
 	if (range0_sizek) {
@@ -996,50 +1062,28 @@
 				(range0_basek + range0_sizek)<<10);
 		state->reg = range_to_mtrr(state->reg, range0_basek,
 				range0_sizek, MTRR_TYPE_WRBACK);
-
 	}
 
-	range_basek = range0_basek + range0_sizek;
-	range_sizek = chunk_sizek;
-
-	if (range_basek + range_sizek > basek &&
-	    range_basek + range_sizek <= (basek + sizek)) {
-		/* one hole */
-		second_basek = basek;
-		second_sizek = range_basek + range_sizek - basek;
-	}
-
-	/* if last piece, only could one hole near end */
-	if ((second_basek || !basek) &&
-	    range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
-	    (chunk_sizek >> 1)) {
-		/*
-		 * one hole in middle (second_sizek is 0) or at end
-		 * (second_sizek is 0 )
-		 */
-		hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
-				 - second_sizek;
-		hole_basek = range_basek + range_sizek - hole_sizek
-				 - second_sizek;
-	} else {
-		/* fallback for big hole, or several holes */
+	if (range0_sizek < state->range_sizek) {
+		/* need to handle left over */
 		range_sizek = state->range_sizek - range0_sizek;
-		second_basek = 0;
-		second_sizek = 0;
+
+		if (debug_print)
+			printk(KERN_DEBUG "range: %016lx - %016lx\n",
+				 range_basek<<10,
+				 (range_basek + range_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range_basek,
+				 range_sizek, MTRR_TYPE_WRBACK);
 	}
 
-	if (debug_print)
-		printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
-			 (range_basek + range_sizek)<<10);
-	state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
-					 MTRR_TYPE_WRBACK);
 	if (hole_sizek) {
+		hole_basek = range_basek - hole_sizek - second_sizek;
 		if (debug_print)
 			printk(KERN_DEBUG "hole: %016lx - %016lx\n",
-				 hole_basek<<10, (hole_basek + hole_sizek)<<10);
-		state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
-						 MTRR_TYPE_UNCACHABLE);
-
+				 hole_basek<<10,
+				 (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek,
+				 hole_sizek, MTRR_TYPE_UNCACHABLE);
 	}
 
 	return second_sizek;
@@ -1154,11 +1198,11 @@
 };
 
 /*
- * gran_size: 1M, 2M, ..., 2G
- * chunk size: gran_size, ..., 4G
- * so we need (2+13)*6
+ * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
+ * chunk size: gran_size, ..., 2G
+ * so we need (1+16)*8
  */
-#define NUM_RESULT	90
+#define NUM_RESULT	136
 #define PSHIFT		(PAGE_SHIFT - 10)
 
 static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
@@ -1168,13 +1212,14 @@
 static int __init mtrr_cleanup(unsigned address_bits)
 {
 	unsigned long extra_remove_base, extra_remove_size;
-	unsigned long i, base, size, def, dummy;
+	unsigned long base, size, def, dummy;
 	mtrr_type type;
 	int nr_range, nr_range_new;
 	u64 chunk_size, gran_size;
 	unsigned long range_sums, range_sums_new;
 	int index_good;
 	int num_reg_good;
+	int i;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1204,6 +1249,8 @@
 			continue;
 		if (!size)
 			type = MTRR_NUM_TYPES;
+		if (type == MTRR_TYPE_WRPROT)
+			type = MTRR_TYPE_UNCACHABLE;
 		num[type]++;
 	}
 
@@ -1216,23 +1263,57 @@
 		num_var_ranges - num[MTRR_NUM_TYPES])
 		return 0;
 
+	/* print original var MTRRs at first, for debugging: */
+	printk(KERN_DEBUG "original variable MTRRs\n");
+	for (i = 0; i < num_var_ranges; i++) {
+		char start_factor = 'K', size_factor = 'K';
+		unsigned long start_base, size_base;
+
+		size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
+		if (!size_base)
+			continue;
+
+		size_base = to_size_factor(size_base, &size_factor),
+		start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
+		start_base = to_size_factor(start_base, &start_factor),
+		type = range_state[i].type;
+
+		printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+			i, start_base, start_factor,
+			size_base, size_factor,
+			(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
+			    ((type == MTRR_TYPE_WRPROT) ? "WP" :
+			     ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
+			);
+	}
+
 	memset(range, 0, sizeof(range));
 	extra_remove_size = 0;
-	if (mtrr_tom2) {
-		extra_remove_base = 1 << (32 - PAGE_SHIFT);
+	extra_remove_base = 1 << (32 - PAGE_SHIFT);
+	if (mtrr_tom2)
 		extra_remove_size =
 			(mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
-	}
 	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
 					  extra_remove_size);
+	/*
+	 * [0, 1M) should always be coverred by var mtrr with WB
+	 * and fixed mtrrs should take effective before var mtrr for it
+	 */
+	nr_range = add_range_with_merge(range, nr_range, 0,
+					(1ULL<<(20 - PAGE_SHIFT)) - 1);
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+
 	range_sums = sum_ranges(range, nr_range);
 	printk(KERN_INFO "total RAM coverred: %ldM\n",
 	       range_sums >> (20 - PAGE_SHIFT));
 
 	if (mtrr_chunk_size && mtrr_gran_size) {
 		int num_reg;
+		char gran_factor, chunk_factor, lose_factor;
+		unsigned long gran_base, chunk_base, lose_base;
 
-		debug_print = 1;
+		debug_print++;
 		/* convert ranges to var ranges state */
 		num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
 					      mtrr_gran_size);
@@ -1256,34 +1337,48 @@
 			result[i].lose_cover_sizek =
 				(range_sums - range_sums_new) << PSHIFT;
 
-		printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
-			 result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10,
-			 result[i].chunk_sizek >> 10);
-		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+		gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+		chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+		lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+		printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+			 result[i].bad?"*BAD*":" ",
+			 gran_base, gran_factor, chunk_base, chunk_factor);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
 			 result[i].num_reg, result[i].bad?"-":"",
-			 result[i].lose_cover_sizek >> 10);
+			 lose_base, lose_factor);
 		if (!result[i].bad) {
 			set_var_mtrr_all(address_bits);
 			return 1;
 		}
 		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
 		       "will find optimal one\n");
-		debug_print = 0;
+		debug_print--;
 		memset(result, 0, sizeof(result[0]));
 	}
 
 	i = 0;
 	memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
 	memset(result, 0, sizeof(result));
-	for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
-		for (chunk_size = gran_size; chunk_size < (1ULL<<33);
+	for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
+		char gran_factor;
+		unsigned long gran_base;
+
+		if (debug_print)
+			gran_base = to_size_factor(gran_size >> 10, &gran_factor);
+
+		for (chunk_size = gran_size; chunk_size < (1ULL<<32);
 		     chunk_size <<= 1) {
 			int num_reg;
 
-			if (debug_print)
-				printk(KERN_INFO
-			       "\ngran_size: %lldM   chunk_size_size: %lldM\n",
-				       gran_size >> 20, chunk_size >> 20);
+			if (debug_print) {
+				char chunk_factor;
+				unsigned long chunk_base;
+
+				chunk_base = to_size_factor(chunk_size>>10, &chunk_factor),
+				printk(KERN_INFO "\n");
+				printk(KERN_INFO "gran_size: %ld%c   chunk_size: %ld%c \n",
+				       gran_base, gran_factor, chunk_base, chunk_factor);
+			}
 			if (i >= NUM_RESULT)
 				continue;
 
@@ -1326,12 +1421,18 @@
 
 	/* print out all */
 	for (i = 0; i < NUM_RESULT; i++) {
-		printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
-		       result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
-		       result[i].chunk_sizek >> 10);
-		printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n",
-		       result[i].num_reg, result[i].bad?"-":"",
-		       result[i].lose_cover_sizek >> 10);
+		char gran_factor, chunk_factor, lose_factor;
+		unsigned long gran_base, chunk_base, lose_base;
+
+		gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+		chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+		lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+		printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+			 result[i].bad?"*BAD*":" ",
+			 gran_base, gran_factor, chunk_base, chunk_factor);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
+			 result[i].num_reg, result[i].bad?"-":"",
+			 lose_base, lose_factor);
 	}
 
 	/* try to find the optimal index */
@@ -1339,10 +1440,8 @@
 		nr_mtrr_spare_reg = num_var_ranges - 1;
 	num_reg_good = -1;
 	for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
-		if (!min_loss_pfn[i]) {
+		if (!min_loss_pfn[i])
 			num_reg_good = i;
-			break;
-		}
 	}
 
 	index_good = -1;
@@ -1358,21 +1457,26 @@
 	}
 
 	if (index_good != -1) {
+		char gran_factor, chunk_factor, lose_factor;
+		unsigned long gran_base, chunk_base, lose_base;
+
 		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
 		i = index_good;
-		printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
-				result[i].gran_sizek >> 10,
-				result[i].chunk_sizek >> 10);
-		printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n",
-				result[i].num_reg,
-				result[i].lose_cover_sizek >> 10);
+		gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+		chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+		lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+		printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t",
+			 gran_base, gran_factor, chunk_base, chunk_factor);
+		printk(KERN_CONT "num_reg: %d  \tlose RAM: %ld%c\n",
+			 result[i].num_reg, lose_base, lose_factor);
 		/* convert ranges to var ranges state */
 		chunk_size = result[i].chunk_sizek;
 		chunk_size <<= 10;
 		gran_size = result[i].gran_sizek;
 		gran_size <<= 10;
-		debug_print = 1;
+		debug_print++;
 		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+		debug_print--;
 		set_var_mtrr_all(address_bits);
 		return 1;
 	}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 05cc22d..6bff382 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -295,13 +295,19 @@
 	/* setup the timer */
 	wrmsr(evntsel_msr, evntsel, 0);
 	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
+
+	/* initialize the wd struct before enabling */
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = 0;  /* unused */
+
+	/* ok, everything is initialized, announce that we're set */
+	cpu_nmi_set_wd_enabled();
+
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	evntsel |= K7_EVNTSEL_ENABLE;
 	wrmsr(evntsel_msr, evntsel, 0);
 
-	wd->perfctr_msr = perfctr_msr;
-	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = 0;  /* unused */
 	return 1;
 }
 
@@ -379,13 +385,19 @@
 	wrmsr(evntsel_msr, evntsel, 0);
 	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
 	write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
+
+	/* initialize the wd struct before enabling */
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = 0;  /* unused */
+
+	/* ok, everything is initialized, announce that we're set */
+	cpu_nmi_set_wd_enabled();
+
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	evntsel |= P6_EVNTSEL0_ENABLE;
 	wrmsr(evntsel_msr, evntsel, 0);
 
-	wd->perfctr_msr = perfctr_msr;
-	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = 0;  /* unused */
 	return 1;
 }
 
@@ -432,6 +444,27 @@
 #define P4_CCCR_ENABLE		(1 << 12)
 #define P4_CCCR_OVF 		(1 << 31)
 
+#define P4_CONTROLS 18
+static unsigned int p4_controls[18] = {
+	MSR_P4_BPU_CCCR0,
+	MSR_P4_BPU_CCCR1,
+	MSR_P4_BPU_CCCR2,
+	MSR_P4_BPU_CCCR3,
+	MSR_P4_MS_CCCR0,
+	MSR_P4_MS_CCCR1,
+	MSR_P4_MS_CCCR2,
+	MSR_P4_MS_CCCR3,
+	MSR_P4_FLAME_CCCR0,
+	MSR_P4_FLAME_CCCR1,
+	MSR_P4_FLAME_CCCR2,
+	MSR_P4_FLAME_CCCR3,
+	MSR_P4_IQ_CCCR0,
+	MSR_P4_IQ_CCCR1,
+	MSR_P4_IQ_CCCR2,
+	MSR_P4_IQ_CCCR3,
+	MSR_P4_IQ_CCCR4,
+	MSR_P4_IQ_CCCR5,
+};
 /*
  * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
  * CRU_ESCR0 (with any non-null event selector) through a complemented
@@ -473,6 +506,26 @@
 		evntsel_msr = MSR_P4_CRU_ESCR0;
 		cccr_msr = MSR_P4_IQ_CCCR0;
 		cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
+
+		/*
+		 * If we're on the kdump kernel or other situation, we may
+		 * still have other performance counter registers set to
+		 * interrupt and they'll keep interrupting forever because
+		 * of the P4_CCCR_OVF quirk. So we need to ACK all the
+		 * pending interrupts and disable all the registers here,
+		 * before reenabling the NMI delivery. Refer to p4_rearm()
+		 * about the P4_CCCR_OVF quirk.
+		 */
+		if (reset_devices) {
+			unsigned int low, high;
+			int i;
+
+			for (i = 0; i < P4_CONTROLS; i++) {
+				rdmsr(p4_controls[i], low, high);
+				low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
+				wrmsr(p4_controls[i], low, high);
+			}
+		}
 	} else {
 		/* logical cpu 1 */
 		perfctr_msr = MSR_P4_IQ_PERFCTR1;
@@ -499,12 +552,17 @@
 	wrmsr(evntsel_msr, evntsel, 0);
 	wrmsr(cccr_msr, cccr_val, 0);
 	write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	cccr_val |= P4_CCCR_ENABLE;
-	wrmsr(cccr_msr, cccr_val, 0);
+
 	wd->perfctr_msr = perfctr_msr;
 	wd->evntsel_msr = evntsel_msr;
 	wd->cccr_msr = cccr_msr;
+
+	/* ok, everything is initialized, announce that we're set */
+	cpu_nmi_set_wd_enabled();
+
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	cccr_val |= P4_CCCR_ENABLE;
+	wrmsr(cccr_msr, cccr_val, 0);
 	return 1;
 }
 
@@ -620,13 +678,17 @@
 	wrmsr(evntsel_msr, evntsel, 0);
 	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
 	write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsr(evntsel_msr, evntsel, 0);
 
 	wd->perfctr_msr = perfctr_msr;
 	wd->evntsel_msr = evntsel_msr;
 	wd->cccr_msr = 0;  /* unused */
+
+	/* ok, everything is initialized, announce that we're set */
+	cpu_nmi_set_wd_enabled();
+
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsr(evntsel_msr, evntsel, 0);
 	intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
 	return 1;
 }
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 8e9cd6a..6a44d64 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -36,7 +36,6 @@
 #include <linux/smp_lock.h>
 #include <linux/major.h>
 #include <linux/fs.h>
-#include <linux/smp_lock.h>
 #include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 15e6c6b..e90a60e 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -7,9 +7,8 @@
 
 #include <linux/errno.h>
 #include <linux/crash_dump.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
 
 /**
  * copy_oldmem_page - copy one page from "oldmem"
@@ -25,7 +24,7 @@
  * in the current kernel. We stitch up a pte, similar to kmap_atomic.
  */
 ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
-                               size_t csize, unsigned long offset, int userbuf)
+		size_t csize, unsigned long offset, int userbuf)
 {
 	void  *vaddr;
 
@@ -33,14 +32,16 @@
 		return 0;
 
 	vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+	if (!vaddr)
+		return -ENOMEM;
 
 	if (userbuf) {
-		if (copy_to_user(buf, (vaddr + offset), csize)) {
+		if (copy_to_user(buf, vaddr + offset, csize)) {
 			iounmap(vaddr);
 			return -EFAULT;
 		}
 	} else
-	memcpy(buf, (vaddr + offset), csize);
+		memcpy(buf, vaddr + offset, csize);
 
 	iounmap(vaddr);
 	return csize;
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 11c11b8..2b69994 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -2,26 +2,49 @@
  * Debug Store support
  *
  * This provides a low-level interface to the hardware's Debug Store
- * feature that is used for last branch recording (LBR) and
+ * feature that is used for branch trace store (BTS) and
  * precise-event based sampling (PEBS).
  *
- * Different architectures use a different DS layout/pointer size.
- * The below functions therefore work on a void*.
+ * It manages:
+ * - per-thread and per-cpu allocation of BTS and PEBS
+ * - buffer memory allocation (optional)
+ * - buffer overflow handling
+ * - buffer access
+ *
+ * It assumes:
+ * - get_task_struct on all parameter tasks
+ * - current is allowed to trace parameter tasks
  *
  *
- * Since there is no user for PEBS, yet, only LBR (or branch
- * trace store, BTS) is supported.
- *
- *
- * Copyright (C) 2007 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ * Copyright (C) 2007-2008 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
  */
 
+
+#ifdef CONFIG_X86_DS
+
 #include <asm/ds.h>
 
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+
+/*
+ * The configuration for a particular DS hardware implementation.
+ */
+struct ds_configuration {
+	/* the size of the DS structure in bytes */
+	unsigned char  sizeof_ds;
+	/* the size of one pointer-typed field in the DS structure in bytes;
+	   this covers the first 8 fields related to buffer management. */
+	unsigned char  sizeof_field;
+	/* the size of a BTS/PEBS record in bytes */
+	unsigned char  sizeof_rec[2];
+};
+static struct ds_configuration ds_cfg;
 
 
 /*
@@ -44,378 +67,747 @@
  *   (interrupt occurs when write pointer passes interrupt pointer)
  * - value to which counter is reset following counter overflow
  *
- * On later architectures, the last branch recording hardware uses
- * 64bit pointers even in 32bit mode.
+ * Later architectures use 64bit pointers throughout, whereas earlier
+ * architectures use 32bit pointers in 32bit mode.
  *
  *
- * Branch Trace Store (BTS) records store information about control
- * flow changes. They at least provide the following information:
- * - source linear address
- * - destination linear address
+ * We compute the base address for the first 8 fields based on:
+ * - the field size stored in the DS configuration
+ * - the relative field position
+ * - an offset giving the start of the respective region
  *
- * Netburst supported a predicated bit that had been dropped in later
- * architectures. We do not suppor it.
+ * This offset is further used to index various arrays holding
+ * information for BTS and PEBS at the respective index.
  *
- *
- * In order to abstract from the actual DS and BTS layout, we describe
- * the access to the relevant fields.
- * Thanks to Andi Kleen for proposing this design.
- *
- * The implementation, however, is not as general as it might seem. In
- * order to stay somewhat simple and efficient, we assume an
- * underlying unsigned type (mostly a pointer type) and we expect the
- * field to be at least as big as that type.
+ * On later 32bit processors, we only access the lower 32bit of the
+ * 64bit pointer fields. The upper halves will be zeroed out.
  */
 
-/*
- * A special from_ip address to indicate that the BTS record is an
- * info record that needs to be interpreted or skipped.
- */
-#define BTS_ESCAPE_ADDRESS (-1)
-
-/*
- * A field access descriptor
- */
-struct access_desc {
-	unsigned char offset;
-	unsigned char size;
+enum ds_field {
+	ds_buffer_base = 0,
+	ds_index,
+	ds_absolute_maximum,
+	ds_interrupt_threshold,
 };
 
-/*
- * The configuration for a particular DS/BTS hardware implementation.
- */
-struct ds_configuration {
-	/* the DS configuration */
-	unsigned char  sizeof_ds;
-	struct access_desc bts_buffer_base;
-	struct access_desc bts_index;
-	struct access_desc bts_absolute_maximum;
-	struct access_desc bts_interrupt_threshold;
-	/* the BTS configuration */
-	unsigned char  sizeof_bts;
-	struct access_desc from_ip;
-	struct access_desc to_ip;
-	/* BTS variants used to store additional information like
-	   timestamps */
-	struct access_desc info_type;
-	struct access_desc info_data;
-	unsigned long debugctl_mask;
+enum ds_qualifier {
+	ds_bts  = 0,
+	ds_pebs
 };
 
-/*
- * The global configuration used by the below accessor functions
- */
-static struct ds_configuration ds_cfg;
+static inline unsigned long ds_get(const unsigned char *base,
+				   enum ds_qualifier qual, enum ds_field field)
+{
+	base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+	return *(unsigned long *)base;
+}
+
+static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
+			  enum ds_field field, unsigned long value)
+{
+	base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+	(*(unsigned long *)base) = value;
+}
+
 
 /*
- * Accessor functions for some DS and BTS fields using the above
- * global ptrace_bts_cfg.
+ * Locking is done only for allocating BTS or PEBS resources and for
+ * guarding context and buffer memory allocation.
+ *
+ * Most functions require the current task to own the ds context part
+ * they are going to access. All the locking is done when validating
+ * access to the context.
  */
-static inline unsigned long get_bts_buffer_base(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset);
-}
-static inline void set_bts_buffer_base(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value;
-}
-static inline unsigned long get_bts_index(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.bts_index.offset);
-}
-static inline void set_bts_index(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value;
-}
-static inline unsigned long get_bts_absolute_maximum(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset);
-}
-static inline void set_bts_absolute_maximum(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value;
-}
-static inline unsigned long get_bts_interrupt_threshold(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset);
-}
-static inline void set_bts_interrupt_threshold(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value;
-}
-static inline unsigned long get_from_ip(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.from_ip.offset);
-}
-static inline void set_from_ip(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value;
-}
-static inline unsigned long get_to_ip(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.to_ip.offset);
-}
-static inline void set_to_ip(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value;
-}
-static inline unsigned char get_info_type(char *base)
-{
-	return *(unsigned char *)(base + ds_cfg.info_type.offset);
-}
-static inline void set_info_type(char *base, unsigned char value)
-{
-	(*(unsigned char *)(base + ds_cfg.info_type.offset)) = value;
-}
-static inline unsigned long get_info_data(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.info_data.offset);
-}
-static inline void set_info_data(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.info_data.offset)) = value;
-}
+static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
 
-
-int ds_allocate(void **dsp, size_t bts_size_in_bytes)
+/*
+ * Validate that the current task is allowed to access the BTS/PEBS
+ * buffer of the parameter task.
+ *
+ * Returns 0, if access is granted; -Eerrno, otherwise.
+ */
+static inline int ds_validate_access(struct ds_context *context,
+				     enum ds_qualifier qual)
 {
-	size_t bts_size_in_records;
-	unsigned long bts;
-	void *ds;
+	if (!context)
+		return -EPERM;
 
-	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
-		return -EOPNOTSUPP;
-
-	if (bts_size_in_bytes < 0)
-		return -EINVAL;
-
-	bts_size_in_records =
-		bts_size_in_bytes / ds_cfg.sizeof_bts;
-	bts_size_in_bytes =
-		bts_size_in_records * ds_cfg.sizeof_bts;
-
-	if (bts_size_in_bytes <= 0)
-		return -EINVAL;
-
-	bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL);
-
-	if (!bts)
-		return -ENOMEM;
-
-	ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
-
-	if (!ds) {
-		kfree((void *)bts);
-		return -ENOMEM;
-	}
-
-	set_bts_buffer_base(ds, bts);
-	set_bts_index(ds, bts);
-	set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
-	set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
-
-	*dsp = ds;
-	return 0;
-}
-
-int ds_free(void **dsp)
-{
-	if (*dsp) {
-		kfree((void *)get_bts_buffer_base(*dsp));
-		kfree(*dsp);
-		*dsp = NULL;
-	}
-	return 0;
-}
-
-int ds_get_bts_size(void *ds)
-{
-	int size_in_bytes;
-
-	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
-		return -EOPNOTSUPP;
-
-	if (!ds)
+	if (context->owner[qual] == current)
 		return 0;
 
-	size_in_bytes =
-		get_bts_absolute_maximum(ds) -
-		get_bts_buffer_base(ds);
-	return size_in_bytes;
+	return -EPERM;
 }
 
-int ds_get_bts_end(void *ds)
+
+/*
+ * We either support (system-wide) per-cpu or per-thread allocation.
+ * We distinguish the two based on the task_struct pointer, where a
+ * NULL pointer indicates per-cpu allocation for the current cpu.
+ *
+ * Allocations are use-counted. As soon as resources are allocated,
+ * further allocations must be of the same type (per-cpu or
+ * per-thread). We model this by counting allocations (i.e. the number
+ * of tracers of a certain type) for one type negatively:
+ *   =0  no tracers
+ *   >0  number of per-thread tracers
+ *   <0  number of per-cpu tracers
+ *
+ * The below functions to get and put tracers and to check the
+ * allocation type require the ds_lock to be held by the caller.
+ *
+ * Tracers essentially gives the number of ds contexts for a certain
+ * type of allocation.
+ */
+static long tracers;
+
+static inline void get_tracer(struct task_struct *task)
 {
-	int size_in_bytes = ds_get_bts_size(ds);
-
-	if (size_in_bytes <= 0)
-		return size_in_bytes;
-
-	return size_in_bytes / ds_cfg.sizeof_bts;
+	tracers += (task ? 1 : -1);
 }
 
-int ds_get_bts_index(void *ds)
+static inline void put_tracer(struct task_struct *task)
 {
-	int index_offset_in_bytes;
-
-	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
-		return -EOPNOTSUPP;
-
-	index_offset_in_bytes =
-		get_bts_index(ds) -
-		get_bts_buffer_base(ds);
-
-	return index_offset_in_bytes / ds_cfg.sizeof_bts;
+	tracers -= (task ? 1 : -1);
 }
 
-int ds_set_overflow(void *ds, int method)
+static inline int check_tracer(struct task_struct *task)
 {
-	switch (method) {
-	case DS_O_SIGNAL:
-		return -EOPNOTSUPP;
-	case DS_O_WRAP:
-		return 0;
-	default:
-		return -EINVAL;
-	}
+	return (task ? (tracers >= 0) : (tracers <= 0));
 }
 
-int ds_get_overflow(void *ds)
+
+/*
+ * The DS context is either attached to a thread or to a cpu:
+ * - in the former case, the thread_struct contains a pointer to the
+ *   attached context.
+ * - in the latter case, we use a static array of per-cpu context
+ *   pointers.
+ *
+ * Contexts are use-counted. They are allocated on first access and
+ * deallocated when the last user puts the context.
+ *
+ * We distinguish between an allocating and a non-allocating get of a
+ * context:
+ * - the allocating get is used for requesting BTS/PEBS resources. It
+ *   requires the caller to hold the global ds_lock.
+ * - the non-allocating get is used for all other cases. A
+ *   non-existing context indicates an error. It acquires and releases
+ *   the ds_lock itself for obtaining the context.
+ *
+ * A context and its DS configuration are allocated and deallocated
+ * together. A context always has a DS configuration of the
+ * appropriate size.
+ */
+static DEFINE_PER_CPU(struct ds_context *, system_context);
+
+#define this_system_context per_cpu(system_context, smp_processor_id())
+
+/*
+ * Returns the pointer to the parameter task's context or to the
+ * system-wide context, if task is NULL.
+ *
+ * Increases the use count of the returned context, if not NULL.
+ */
+static inline struct ds_context *ds_get_context(struct task_struct *task)
 {
-	return DS_O_WRAP;
+	struct ds_context *context;
+
+	spin_lock(&ds_lock);
+
+	context = (task ? task->thread.ds_ctx : this_system_context);
+	if (context)
+		context->count++;
+
+	spin_unlock(&ds_lock);
+
+	return context;
 }
 
-int ds_clear(void *ds)
+/*
+ * Same as ds_get_context, but allocates the context and it's DS
+ * structure, if necessary; returns NULL; if out of memory.
+ *
+ * pre: requires ds_lock to be held
+ */
+static inline struct ds_context *ds_alloc_context(struct task_struct *task)
 {
-	int bts_size = ds_get_bts_size(ds);
-	unsigned long bts_base;
+	struct ds_context **p_context =
+		(task ? &task->thread.ds_ctx : &this_system_context);
+	struct ds_context *context = *p_context;
 
-	if (bts_size <= 0)
-		return bts_size;
+	if (!context) {
+		context = kzalloc(sizeof(*context), GFP_KERNEL);
 
-	bts_base = get_bts_buffer_base(ds);
-	memset((void *)bts_base, 0, bts_size);
+		if (!context)
+			return NULL;
 
-	set_bts_index(ds, bts_base);
-	return 0;
-}
+		context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
+		if (!context->ds) {
+			kfree(context);
+			return NULL;
+		}
 
-int ds_read_bts(void *ds, int index, struct bts_struct *out)
-{
-	void *bts;
+		*p_context = context;
 
-	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
-		return -EOPNOTSUPP;
+		context->this = p_context;
+		context->task = task;
 
-	if (index < 0)
-		return -EINVAL;
+		if (task)
+			set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
 
-	if (index >= ds_get_bts_size(ds))
-		return -EINVAL;
+		if (!task || (task == current))
+			wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
 
-	bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts));
-
-	memset(out, 0, sizeof(*out));
-	if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
-		out->qualifier       = get_info_type(bts);
-		out->variant.jiffies = get_info_data(bts);
-	} else {
-		out->qualifier = BTS_BRANCH;
-		out->variant.lbr.from_ip = get_from_ip(bts);
-		out->variant.lbr.to_ip   = get_to_ip(bts);
+		get_tracer(task);
 	}
 
-	return sizeof(*out);;
+	context->count++;
+
+	return context;
 }
 
-int ds_write_bts(void *ds, const struct bts_struct *in)
+/*
+ * Decreases the use count of the parameter context, if not NULL.
+ * Deallocates the context, if the use count reaches zero.
+ */
+static inline void ds_put_context(struct ds_context *context)
 {
-	unsigned long bts;
+	if (!context)
+		return;
 
-	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+	spin_lock(&ds_lock);
+
+	if (--context->count)
+		goto out;
+
+	*(context->this) = NULL;
+
+	if (context->task)
+		clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+
+	if (!context->task || (context->task == current))
+		wrmsrl(MSR_IA32_DS_AREA, 0);
+
+	put_tracer(context->task);
+
+	/* free any leftover buffers from tracers that did not
+	 * deallocate them properly. */
+	kfree(context->buffer[ds_bts]);
+	kfree(context->buffer[ds_pebs]);
+	kfree(context->ds);
+	kfree(context);
+ out:
+	spin_unlock(&ds_lock);
+}
+
+
+/*
+ * Handle a buffer overflow
+ *
+ * task: the task whose buffers are overflowing;
+ *       NULL for a buffer overflow on the current cpu
+ * context: the ds context
+ * qual: the buffer type
+ */
+static void ds_overflow(struct task_struct *task, struct ds_context *context,
+			enum ds_qualifier qual)
+{
+	if (!context)
+		return;
+
+	if (context->callback[qual])
+		(*context->callback[qual])(task);
+
+	/* todo: do some more overflow handling */
+}
+
+
+/*
+ * Allocate a non-pageable buffer of the parameter size.
+ * Checks the memory and the locked memory rlimit.
+ *
+ * Returns the buffer, if successful;
+ *         NULL, if out of memory or rlimit exceeded.
+ *
+ * size: the requested buffer size in bytes
+ * pages (out): if not NULL, contains the number of pages reserved
+ */
+static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
+{
+	unsigned long rlim, vm, pgsz;
+	void *buffer;
+
+	pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+	vm   = current->mm->total_vm  + pgsz;
+	if (rlim < vm)
+		return NULL;
+
+	rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+	vm   = current->mm->locked_vm  + pgsz;
+	if (rlim < vm)
+		return NULL;
+
+	buffer = kzalloc(size, GFP_KERNEL);
+	if (!buffer)
+		return NULL;
+
+	current->mm->total_vm  += pgsz;
+	current->mm->locked_vm += pgsz;
+
+	if (pages)
+		*pages = pgsz;
+
+	return buffer;
+}
+
+static int ds_request(struct task_struct *task, void *base, size_t size,
+		      ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
+{
+	struct ds_context *context;
+	unsigned long buffer, adj;
+	const unsigned long alignment = (1 << 3);
+	int error = 0;
+
+	if (!ds_cfg.sizeof_ds)
 		return -EOPNOTSUPP;
 
-	if (ds_get_bts_size(ds) <= 0)
-		return -ENXIO;
-
-	bts = get_bts_index(ds);
-
-	memset((void *)bts, 0, ds_cfg.sizeof_bts);
-	switch (in->qualifier) {
-	case BTS_INVALID:
-		break;
-
-	case BTS_BRANCH:
-		set_from_ip((void *)bts, in->variant.lbr.from_ip);
-		set_to_ip((void *)bts, in->variant.lbr.to_ip);
-		break;
-
-	case BTS_TASK_ARRIVES:
-	case BTS_TASK_DEPARTS:
-		set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS);
-		set_info_type((void *)bts, in->qualifier);
-		set_info_data((void *)bts, in->variant.jiffies);
-		break;
-
-	default:
+	/* we require some space to do alignment adjustments below */
+	if (size < (alignment + ds_cfg.sizeof_rec[qual]))
 		return -EINVAL;
+
+	/* buffer overflow notification is not yet implemented */
+	if (ovfl)
+		return -EOPNOTSUPP;
+
+
+	spin_lock(&ds_lock);
+
+	if (!check_tracer(task))
+		return -EPERM;
+
+	error = -ENOMEM;
+	context = ds_alloc_context(task);
+	if (!context)
+		goto out_unlock;
+
+	error = -EALREADY;
+	if (context->owner[qual] == current)
+		goto out_unlock;
+	error = -EPERM;
+	if (context->owner[qual] != NULL)
+		goto out_unlock;
+	context->owner[qual] = current;
+
+	spin_unlock(&ds_lock);
+
+
+	error = -ENOMEM;
+	if (!base) {
+		base = ds_allocate_buffer(size, &context->pages[qual]);
+		if (!base)
+			goto out_release;
+
+		context->buffer[qual]   = base;
+	}
+	error = 0;
+
+	context->callback[qual] = ovfl;
+
+	/* adjust the buffer address and size to meet alignment
+	 * constraints:
+	 * - buffer is double-word aligned
+	 * - size is multiple of record size
+	 *
+	 * We checked the size at the very beginning; we have enough
+	 * space to do the adjustment.
+	 */
+	buffer = (unsigned long)base;
+
+	adj = ALIGN(buffer, alignment) - buffer;
+	buffer += adj;
+	size   -= adj;
+
+	size /= ds_cfg.sizeof_rec[qual];
+	size *= ds_cfg.sizeof_rec[qual];
+
+	ds_set(context->ds, qual, ds_buffer_base, buffer);
+	ds_set(context->ds, qual, ds_index, buffer);
+	ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
+
+	if (ovfl) {
+		/* todo: select a suitable interrupt threshold */
+	} else
+		ds_set(context->ds, qual,
+		       ds_interrupt_threshold, buffer + size + 1);
+
+	/* we keep the context until ds_release */
+	return error;
+
+ out_release:
+	context->owner[qual] = NULL;
+	ds_put_context(context);
+	return error;
+
+ out_unlock:
+	spin_unlock(&ds_lock);
+	ds_put_context(context);
+	return error;
+}
+
+int ds_request_bts(struct task_struct *task, void *base, size_t size,
+		   ds_ovfl_callback_t ovfl)
+{
+	return ds_request(task, base, size, ovfl, ds_bts);
+}
+
+int ds_request_pebs(struct task_struct *task, void *base, size_t size,
+		    ds_ovfl_callback_t ovfl)
+{
+	return ds_request(task, base, size, ovfl, ds_pebs);
+}
+
+static int ds_release(struct task_struct *task, enum ds_qualifier qual)
+{
+	struct ds_context *context;
+	int error;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, qual);
+	if (error < 0)
+		goto out;
+
+	kfree(context->buffer[qual]);
+	context->buffer[qual] = NULL;
+
+	current->mm->total_vm  -= context->pages[qual];
+	current->mm->locked_vm -= context->pages[qual];
+	context->pages[qual] = 0;
+	context->owner[qual] = NULL;
+
+	/*
+	 * we put the context twice:
+	 *   once for the ds_get_context
+	 *   once for the corresponding ds_request
+	 */
+	ds_put_context(context);
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+int ds_release_bts(struct task_struct *task)
+{
+	return ds_release(task, ds_bts);
+}
+
+int ds_release_pebs(struct task_struct *task)
+{
+	return ds_release(task, ds_pebs);
+}
+
+static int ds_get_index(struct task_struct *task, size_t *pos,
+			enum ds_qualifier qual)
+{
+	struct ds_context *context;
+	unsigned long base, index;
+	int error;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, qual);
+	if (error < 0)
+		goto out;
+
+	base  = ds_get(context->ds, qual, ds_buffer_base);
+	index = ds_get(context->ds, qual, ds_index);
+
+	error = ((index - base) / ds_cfg.sizeof_rec[qual]);
+	if (pos)
+		*pos = error;
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+int ds_get_bts_index(struct task_struct *task, size_t *pos)
+{
+	return ds_get_index(task, pos, ds_bts);
+}
+
+int ds_get_pebs_index(struct task_struct *task, size_t *pos)
+{
+	return ds_get_index(task, pos, ds_pebs);
+}
+
+static int ds_get_end(struct task_struct *task, size_t *pos,
+		      enum ds_qualifier qual)
+{
+	struct ds_context *context;
+	unsigned long base, end;
+	int error;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, qual);
+	if (error < 0)
+		goto out;
+
+	base = ds_get(context->ds, qual, ds_buffer_base);
+	end  = ds_get(context->ds, qual, ds_absolute_maximum);
+
+	error = ((end - base) / ds_cfg.sizeof_rec[qual]);
+	if (pos)
+		*pos = error;
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+int ds_get_bts_end(struct task_struct *task, size_t *pos)
+{
+	return ds_get_end(task, pos, ds_bts);
+}
+
+int ds_get_pebs_end(struct task_struct *task, size_t *pos)
+{
+	return ds_get_end(task, pos, ds_pebs);
+}
+
+static int ds_access(struct task_struct *task, size_t index,
+		     const void **record, enum ds_qualifier qual)
+{
+	struct ds_context *context;
+	unsigned long base, idx;
+	int error;
+
+	if (!record)
+		return -EINVAL;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, qual);
+	if (error < 0)
+		goto out;
+
+	base = ds_get(context->ds, qual, ds_buffer_base);
+	idx = base + (index * ds_cfg.sizeof_rec[qual]);
+
+	error = -EINVAL;
+	if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
+		goto out;
+
+	*record = (const void *)idx;
+	error = ds_cfg.sizeof_rec[qual];
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+int ds_access_bts(struct task_struct *task, size_t index, const void **record)
+{
+	return ds_access(task, index, record, ds_bts);
+}
+
+int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
+{
+	return ds_access(task, index, record, ds_pebs);
+}
+
+static int ds_write(struct task_struct *task, const void *record, size_t size,
+		    enum ds_qualifier qual, int force)
+{
+	struct ds_context *context;
+	int error;
+
+	if (!record)
+		return -EINVAL;
+
+	error = -EPERM;
+	context = ds_get_context(task);
+	if (!context)
+		goto out;
+
+	if (!force) {
+		error = ds_validate_access(context, qual);
+		if (error < 0)
+			goto out;
 	}
 
-	bts = bts + ds_cfg.sizeof_bts;
-	if (bts >= get_bts_absolute_maximum(ds))
-		bts = get_bts_buffer_base(ds);
-	set_bts_index(ds, bts);
+	error = 0;
+	while (size) {
+		unsigned long base, index, end, write_end, int_th;
+		unsigned long write_size, adj_write_size;
 
-	return ds_cfg.sizeof_bts;
+		/*
+		 * write as much as possible without producing an
+		 * overflow interrupt.
+		 *
+		 * interrupt_threshold must either be
+		 * - bigger than absolute_maximum or
+		 * - point to a record between buffer_base and absolute_maximum
+		 *
+		 * index points to a valid record.
+		 */
+		base   = ds_get(context->ds, qual, ds_buffer_base);
+		index  = ds_get(context->ds, qual, ds_index);
+		end    = ds_get(context->ds, qual, ds_absolute_maximum);
+		int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
+
+		write_end = min(end, int_th);
+
+		/* if we are already beyond the interrupt threshold,
+		 * we fill the entire buffer */
+		if (write_end <= index)
+			write_end = end;
+
+		if (write_end <= index)
+			goto out;
+
+		write_size = min((unsigned long) size, write_end - index);
+		memcpy((void *)index, record, write_size);
+
+		record = (const char *)record + write_size;
+		size  -= write_size;
+		error += write_size;
+
+		adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
+		adj_write_size *= ds_cfg.sizeof_rec[qual];
+
+		/* zero out trailing bytes */
+		memset((char *)index + write_size, 0,
+		       adj_write_size - write_size);
+		index += adj_write_size;
+
+		if (index >= end)
+			index = base;
+		ds_set(context->ds, qual, ds_index, index);
+
+		if (index >= int_th)
+			ds_overflow(task, context, qual);
+	}
+
+ out:
+	ds_put_context(context);
+	return error;
 }
 
-unsigned long ds_debugctl_mask(void)
+int ds_write_bts(struct task_struct *task, const void *record, size_t size)
 {
-	return ds_cfg.debugctl_mask;
+	return ds_write(task, record, size, ds_bts, /* force = */ 0);
 }
 
-#ifdef __i386__
-static const struct ds_configuration ds_cfg_netburst = {
-	.sizeof_ds = 9 * 4,
-	.bts_buffer_base = { 0, 4 },
-	.bts_index = { 4, 4 },
-	.bts_absolute_maximum = { 8, 4 },
-	.bts_interrupt_threshold = { 12, 4 },
-	.sizeof_bts = 3 * 4,
-	.from_ip = { 0, 4 },
-	.to_ip = { 4, 4 },
-	.info_type = { 4, 1 },
-	.info_data = { 8, 4 },
-	.debugctl_mask = (1<<2)|(1<<3)
+int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
+{
+	return ds_write(task, record, size, ds_pebs, /* force = */ 0);
+}
+
+int ds_unchecked_write_bts(struct task_struct *task,
+			   const void *record, size_t size)
+{
+	return ds_write(task, record, size, ds_bts, /* force = */ 1);
+}
+
+int ds_unchecked_write_pebs(struct task_struct *task,
+			    const void *record, size_t size)
+{
+	return ds_write(task, record, size, ds_pebs, /* force = */ 1);
+}
+
+static int ds_reset_or_clear(struct task_struct *task,
+			     enum ds_qualifier qual, int clear)
+{
+	struct ds_context *context;
+	unsigned long base, end;
+	int error;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, qual);
+	if (error < 0)
+		goto out;
+
+	base = ds_get(context->ds, qual, ds_buffer_base);
+	end  = ds_get(context->ds, qual, ds_absolute_maximum);
+
+	if (clear)
+		memset((void *)base, 0, end - base);
+
+	ds_set(context->ds, qual, ds_index, base);
+
+	error = 0;
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+int ds_reset_bts(struct task_struct *task)
+{
+	return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
+}
+
+int ds_reset_pebs(struct task_struct *task)
+{
+	return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
+}
+
+int ds_clear_bts(struct task_struct *task)
+{
+	return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
+}
+
+int ds_clear_pebs(struct task_struct *task)
+{
+	return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
+}
+
+int ds_get_pebs_reset(struct task_struct *task, u64 *value)
+{
+	struct ds_context *context;
+	int error;
+
+	if (!value)
+		return -EINVAL;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, ds_pebs);
+	if (error < 0)
+		goto out;
+
+	*value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
+
+	error = 0;
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+int ds_set_pebs_reset(struct task_struct *task, u64 value)
+{
+	struct ds_context *context;
+	int error;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, ds_pebs);
+	if (error < 0)
+		goto out;
+
+	*(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
+
+	error = 0;
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+static const struct ds_configuration ds_cfg_var = {
+	.sizeof_ds    = sizeof(long) * 12,
+	.sizeof_field = sizeof(long),
+	.sizeof_rec[ds_bts]   = sizeof(long) * 3,
+	.sizeof_rec[ds_pebs]  = sizeof(long) * 10
 };
-
-static const struct ds_configuration ds_cfg_pentium_m = {
-	.sizeof_ds = 9 * 4,
-	.bts_buffer_base = { 0, 4 },
-	.bts_index = { 4, 4 },
-	.bts_absolute_maximum = { 8, 4 },
-	.bts_interrupt_threshold = { 12, 4 },
-	.sizeof_bts = 3 * 4,
-	.from_ip = { 0, 4 },
-	.to_ip = { 4, 4 },
-	.info_type = { 4, 1 },
-	.info_data = { 8, 4 },
-	.debugctl_mask = (1<<6)|(1<<7)
-};
-#endif /* _i386_ */
-
-static const struct ds_configuration ds_cfg_core2 = {
-	.sizeof_ds = 9 * 8,
-	.bts_buffer_base = { 0, 8 },
-	.bts_index = { 8, 8 },
-	.bts_absolute_maximum = { 16, 8 },
-	.bts_interrupt_threshold = { 24, 8 },
-	.sizeof_bts = 3 * 8,
-	.from_ip = { 0, 8 },
-	.to_ip = { 8, 8 },
-	.info_type = { 8, 1 },
-	.info_data = { 16, 8 },
-	.debugctl_mask = (1<<6)|(1<<7)|(1<<9)
+static const struct ds_configuration ds_cfg_64 = {
+	.sizeof_ds    = 8 * 12,
+	.sizeof_field = 8,
+	.sizeof_rec[ds_bts]   = 8 * 3,
+	.sizeof_rec[ds_pebs]  = 8 * 10
 };
 
 static inline void
@@ -429,14 +821,13 @@
 	switch (c->x86) {
 	case 0x6:
 		switch (c->x86_model) {
-#ifdef __i386__
 		case 0xD:
 		case 0xE: /* Pentium M */
-			ds_configure(&ds_cfg_pentium_m);
+			ds_configure(&ds_cfg_var);
 			break;
-#endif /* _i386_ */
 		case 0xF: /* Core2 */
-			ds_configure(&ds_cfg_core2);
+		case 0x1C: /* Atom */
+			ds_configure(&ds_cfg_64);
 			break;
 		default:
 			/* sorry, don't know about them */
@@ -445,13 +836,11 @@
 		break;
 	case 0xF:
 		switch (c->x86_model) {
-#ifdef __i386__
 		case 0x0:
 		case 0x1:
 		case 0x2: /* Netburst */
-			ds_configure(&ds_cfg_netburst);
+			ds_configure(&ds_cfg_var);
 			break;
-#endif /* _i386_ */
 		default:
 			/* sorry, don't know about them */
 			break;
@@ -462,3 +851,14 @@
 		break;
 	}
 }
+
+void ds_free(struct ds_context *context)
+{
+	/* This is called when the task owning the parameter context
+	 * is dying. There should not be any user of that context left
+	 * to disturb us, anymore. */
+	unsigned long leftovers = context->count;
+	while (leftovers--)
+		ds_put_context(context);
+}
+#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 06cc8d4..945a31c 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -414,9 +414,11 @@
 	if (memmap.map == NULL)
 		printk(KERN_ERR "Could not map the EFI memory map!\n");
 	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+
 	if (memmap.desc_size != sizeof(efi_memory_desc_t))
-		printk(KERN_WARNING "Kernel-defined memdesc"
-		       "doesn't match the one from EFI!\n");
+		printk(KERN_WARNING
+		  "Kernel-defined memdesc doesn't match the one from EFI!\n");
+
 	if (add_efi_memmap)
 		do_add_efi_memmap();
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 89434d4..cf3a0b2 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -275,9 +275,9 @@
 ENTRY(ret_from_fork)
 	CFI_DEFAULT_STACK
 	push kernel_eflags(%rip)
-	CFI_ADJUST_CFA_OFFSET 4
+	CFI_ADJUST_CFA_OFFSET 8
 	popf				# reset kernel eflags
-	CFI_ADJUST_CFA_OFFSET -4
+	CFI_ADJUST_CFA_OFFSET -8
 	call schedule_tail
 	GET_THREAD_INFO(%rcx)
 	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 9bfc4d7..d16084f 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -108,12 +108,11 @@
 	}
 	load_idt((const struct desc_ptr *)&idt_descr);
 
-	early_printk("Kernel alive\n");
+	if (console_loglevel == 10)
+		early_printk("Kernel alive\n");
 
 	x86_64_init_pda();
 
-	early_printk("Kernel really alive\n");
-
 	x86_64_start_reservations(real_mode_data);
 }
 
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 50e5e4a..1919143 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/thread_info.h>
 #include <linux/syscalls.h>
+#include <asm/syscalls.h>
 
 /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
 static void set_bitmap(unsigned long *bitmap, unsigned int base,
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index 3f7537b..f1c688e 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -20,6 +20,8 @@
 
 #ifdef CONFIG_X86_32
 #include <mach_apic.h>
+#include <mach_ipi.h>
+
 /*
  * the following functions deal with sending IPIs between CPUs.
  *
@@ -147,7 +149,6 @@
 }
 
 /* must come after the send_IPI functions above for inlining */
-#include <mach_ipi.h>
 static int convert_apicid_to_cpu(int apic_id)
 {
 	int i;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 1cf8c1f..b71e02d 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -325,7 +325,7 @@
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ",
 				per_cpu(irq_stat,j).irq_call_count);
-		seq_printf(p, "  function call interrupts\n");
+		seq_printf(p, "  Function call interrupts\n");
 		seq_printf(p, "TLB: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ",
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1f78b23..f065fe9 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -129,7 +129,7 @@
 		seq_printf(p, "CAL: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
-		seq_printf(p, "  function call interrupts\n");
+		seq_printf(p, "  Function call interrupts\n");
 		seq_printf(p, "TLB: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8b7a3cf..478bca9 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -178,7 +178,7 @@
 	kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
 }
 
-static void kvm_release_pt(u32 pfn)
+static void kvm_release_pt(unsigned long pfn)
 {
 	struct kvm_mmu_op_release_pt rpt = {
 		.header.op = KVM_MMU_OP_RELEASE_PT,
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b68e21f..0ed5f93 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -18,6 +18,7 @@
 #include <asm/ldt.h>
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
+#include <asm/syscalls.h>
 
 #ifdef CONFIG_SMP
 static void flush_ldt(void *current_mm)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index abb78a2..2c97f07 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -299,6 +299,15 @@
 		on_each_cpu(__acpi_nmi_disable, NULL, 1);
 }
 
+/*
+ * This function is called as soon the LAPIC NMI watchdog driver has everything
+ * in place and it's ready to check if the NMIs belong to the NMI watchdog
+ */
+void cpu_nmi_set_wd_enabled(void)
+{
+	__get_cpu_var(wd_enabled) = 1;
+}
+
 void setup_apic_nmi_watchdog(void *unused)
 {
 	if (__get_cpu_var(wd_enabled))
@@ -311,8 +320,6 @@
 
 	switch (nmi_watchdog) {
 	case NMI_LOCAL_APIC:
-		 /* enable it before to avoid race with handler */
-		__get_cpu_var(wd_enabled) = 1;
 		if (lapic_watchdog_init(nmi_hz) < 0) {
 			__get_cpu_var(wd_enabled) = 0;
 			return;
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 3e66722..7a13fac 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -190,12 +190,12 @@
 static void __init platform_detect(void)
 {
 	size_t propsize;
-	u32 rev;
+	__be32 rev;
 
 	if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4,
 			&propsize) || propsize != 4) {
 		printk(KERN_ERR "ofw: getprop call failed!\n");
-		rev = 0;
+		rev = cpu_to_be32(0);
 	}
 	olpc_platform_info.boardrev = be32_to_cpu(rev);
 }
@@ -203,7 +203,7 @@
 static void __init platform_detect(void)
 {
 	/* stopgap until OFW support is added to the kernel */
-	olpc_platform_info.boardrev = be32_to_cpu(0xc2);
+	olpc_platform_info.boardrev = 0xc2;
 }
 #endif
 
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 300da17..e2f4376 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -330,6 +330,7 @@
 #endif
 	.wbinvd = native_wbinvd,
 	.read_msr = native_read_msr_safe,
+	.read_msr_amd = native_read_msr_amd_safe,
 	.write_msr = native_write_msr_safe,
 	.read_tsc = native_read_tsc,
 	.read_pmc = native_read_pmc,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 5826221..9fe644f 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -23,7 +23,7 @@
 			start = start_##ops##_##x;		\
 			end = end_##ops##_##x;			\
 			goto patch_site
-	switch(type) {
+	switch (type) {
 		PATCH_SITE(pv_irq_ops, irq_disable);
 		PATCH_SITE(pv_irq_ops, irq_enable);
 		PATCH_SITE(pv_irq_ops, restore_fl);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 87d4d69..f704cb5 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -82,7 +82,7 @@
 	 * using 512M as goal
 	 */
 	align = 64ULL<<20;
-	size = round_up(dma32_bootmem_size, align);
+	size = roundup(dma32_bootmem_size, align);
 	dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
 				 512ULL<<20);
 	if (dma32_bootmem_ptr)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index be33a54..1a895a5 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -82,7 +82,8 @@
 static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
 static int need_flush;		/* global flush state. set for each gart wrap */
 
-static unsigned long alloc_iommu(struct device *dev, int size)
+static unsigned long alloc_iommu(struct device *dev, int size,
+				 unsigned long align_mask)
 {
 	unsigned long offset, flags;
 	unsigned long boundary_size;
@@ -90,16 +91,17 @@
 
 	base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
 			   PAGE_SIZE) >> PAGE_SHIFT;
-	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+	boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1,
 			      PAGE_SIZE) >> PAGE_SHIFT;
 
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
-				  size, base_index, boundary_size, 0);
+				  size, base_index, boundary_size, align_mask);
 	if (offset == -1) {
 		need_flush = 1;
 		offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
-					  size, base_index, boundary_size, 0);
+					  size, base_index, boundary_size,
+					  align_mask);
 	}
 	if (offset != -1) {
 		next_bit = offset+size;
@@ -236,10 +238,10 @@
  * Caller needs to check if the iommu is needed and flush.
  */
 static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
-				size_t size, int dir)
+				size_t size, int dir, unsigned long align_mask)
 {
 	unsigned long npages = iommu_num_pages(phys_mem, size);
-	unsigned long iommu_page = alloc_iommu(dev, npages);
+	unsigned long iommu_page = alloc_iommu(dev, npages, align_mask);
 	int i;
 
 	if (iommu_page == -1) {
@@ -262,7 +264,11 @@
 static dma_addr_t
 gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir)
 {
-	dma_addr_t map = dma_map_area(dev, paddr, size, dir);
+	dma_addr_t map;
+	unsigned long align_mask;
+
+	align_mask = (1UL << get_order(size)) - 1;
+	map = dma_map_area(dev, paddr, size, dir, align_mask);
 
 	flush_gart();
 
@@ -281,7 +287,8 @@
 	if (!need_iommu(dev, paddr, size))
 		return paddr;
 
-	bus = gart_map_simple(dev, paddr, size, dir);
+	bus = dma_map_area(dev, paddr, size, dir, 0);
+	flush_gart();
 
 	return bus;
 }
@@ -340,7 +347,7 @@
 		unsigned long addr = sg_phys(s);
 
 		if (nonforced_iommu(dev, addr, s->length)) {
-			addr = dma_map_area(dev, addr, s->length, dir);
+			addr = dma_map_area(dev, addr, s->length, dir, 0);
 			if (addr == bad_dma_address) {
 				if (i > 0)
 					gart_unmap_sg(dev, sg, i, dir);
@@ -362,7 +369,7 @@
 			  int nelems, struct scatterlist *sout,
 			  unsigned long pages)
 {
-	unsigned long iommu_start = alloc_iommu(dev, pages);
+	unsigned long iommu_start = alloc_iommu(dev, pages, 0);
 	unsigned long iommu_page = iommu_start;
 	struct scatterlist *s;
 	int i;
diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c
index bc1f2d3..a311ffc 100644
--- a/arch/x86/kernel/pcspeaker.c
+++ b/arch/x86/kernel/pcspeaker.c
@@ -1,20 +1,13 @@
 #include <linux/platform_device.h>
-#include <linux/errno.h>
+#include <linux/err.h>
 #include <linux/init.h>
 
 static __init int add_pcspkr(void)
 {
 	struct platform_device *pd;
-	int ret;
 
-	pd = platform_device_alloc("pcspkr", -1);
-	if (!pd)
-		return -ENOMEM;
+	pd = platform_device_register_simple("pcspkr", -1, NULL, 0);
 
-	ret = platform_device_add(pd);
-	if (ret)
-		platform_device_put(pd);
-
-	return ret;
+	return IS_ERR(pd) ? PTR_ERR(pd) : 0;
 }
 device_initcall(add_pcspkr);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 876e918..ec7a2ba 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -185,7 +185,8 @@
 static void poll_idle(void)
 {
 	local_irq_enable();
-	cpu_relax();
+	while (!need_resched())
+		cpu_relax();
 }
 
 /*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 31f40b2..205188d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -37,6 +37,7 @@
 #include <linux/tick.h>
 #include <linux/percpu.h>
 #include <linux/prctl.h>
+#include <linux/dmi.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -56,6 +57,8 @@
 #include <asm/cpu.h>
 #include <asm/kdebug.h>
 #include <asm/idle.h>
+#include <asm/syscalls.h>
+#include <asm/smp.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -161,6 +164,7 @@
 	unsigned long d0, d1, d2, d3, d6, d7;
 	unsigned long sp;
 	unsigned short ss, gs;
+	const char *board;
 
 	if (user_mode_vm(regs)) {
 		sp = regs->sp;
@@ -173,11 +177,15 @@
 	}
 
 	printk("\n");
-	printk("Pid: %d, comm: %s %s (%s %.*s)\n",
+
+	board = dmi_get_system_info(DMI_PRODUCT_NAME);
+	if (!board)
+		board = "";
+	printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
 			task_pid_nr(current), current->comm,
 			print_tainted(), init_utsname()->release,
 			(int)strcspn(init_utsname()->version, " "),
-			init_utsname()->version);
+			init_utsname()->version, board);
 
 	printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
 			(u16)regs->cs, regs->ip, regs->flags,
@@ -277,6 +285,14 @@
 		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
 		put_cpu();
 	}
+#ifdef CONFIG_X86_DS
+	/* Free any DS contexts that have not been properly released. */
+	if (unlikely(current->thread.ds_ctx)) {
+		/* we clear debugctl to make sure DS is not used. */
+		update_debugctlmsr(0);
+		ds_free(current->thread.ds_ctx);
+	}
+#endif /* CONFIG_X86_DS */
 }
 
 void flush_thread(void)
@@ -438,6 +454,35 @@
 	return 0;
 }
 
+#ifdef CONFIG_X86_DS
+static int update_debugctl(struct thread_struct *prev,
+			struct thread_struct *next, unsigned long debugctl)
+{
+	unsigned long ds_prev = 0;
+	unsigned long ds_next = 0;
+
+	if (prev->ds_ctx)
+		ds_prev = (unsigned long)prev->ds_ctx->ds;
+	if (next->ds_ctx)
+		ds_next = (unsigned long)next->ds_ctx->ds;
+
+	if (ds_next != ds_prev) {
+		/* we clear debugctl to make sure DS
+		 * is not in use when we change it */
+		debugctl = 0;
+		update_debugctlmsr(0);
+		wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
+	}
+	return debugctl;
+}
+#else
+static int update_debugctl(struct thread_struct *prev,
+			struct thread_struct *next, unsigned long debugctl)
+{
+	return debugctl;
+}
+#endif /* CONFIG_X86_DS */
+
 static noinline void
 __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		 struct tss_struct *tss)
@@ -448,14 +493,7 @@
 	prev = &prev_p->thread;
 	next = &next_p->thread;
 
-	debugctl = prev->debugctlmsr;
-	if (next->ds_area_msr != prev->ds_area_msr) {
-		/* we clear debugctl to make sure DS
-		 * is not in use when we change it */
-		debugctl = 0;
-		update_debugctlmsr(0);
-		wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
-	}
+	debugctl = update_debugctl(prev, next, prev->debugctlmsr);
 
 	if (next->debugctlmsr != debugctl)
 		update_debugctlmsr(next->debugctlmsr);
@@ -479,13 +517,13 @@
 			hard_enable_TSC();
 	}
 
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
 	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
 		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
 
 	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
 		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 
 	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e12e0e4..2a8ccb9 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,11 +37,11 @@
 #include <linux/kdebug.h>
 #include <linux/tick.h>
 #include <linux/prctl.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
 
-#include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
-#include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/mmu_context.h>
@@ -51,6 +51,7 @@
 #include <asm/proto.h>
 #include <asm/ia32.h>
 #include <asm/idle.h>
+#include <asm/syscalls.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -88,7 +89,7 @@
 #ifdef CONFIG_HOTPLUG_CPU
 DECLARE_PER_CPU(int, cpu_state);
 
-#include <asm/nmi.h>
+#include <linux/nmi.h>
 /* We halt the CPU with physical CPU hotplug */
 static inline void play_dead(void)
 {
@@ -153,7 +154,7 @@
 }
 
 /* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs * regs)
+void __show_regs(struct pt_regs *regs)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 	unsigned long d0, d1, d2, d3, d6, d7;
@@ -162,59 +163,61 @@
 
 	printk("\n");
 	print_modules();
-	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
 		current->pid, current->comm, print_tainted(),
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
-	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
 	printk_address(regs->ip, 1);
-	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
-		regs->flags);
-	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
+	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
+			regs->sp, regs->flags);
+	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	       regs->ax, regs->bx, regs->cx);
-	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
+	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
 	       regs->dx, regs->si, regs->di);
-	printk("RBP: %016lx R08: %016lx R09: %016lx\n",
+	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
 	       regs->bp, regs->r8, regs->r9);
-	printk("R10: %016lx R11: %016lx R12: %016lx\n",
-	       regs->r10, regs->r11, regs->r12); 
-	printk("R13: %016lx R14: %016lx R15: %016lx\n",
-	       regs->r13, regs->r14, regs->r15); 
+	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
+	       regs->r10, regs->r11, regs->r12);
+	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
+	       regs->r13, regs->r14, regs->r15);
 
-	asm("movl %%ds,%0" : "=r" (ds)); 
-	asm("movl %%cs,%0" : "=r" (cs)); 
-	asm("movl %%es,%0" : "=r" (es)); 
+	asm("movl %%ds,%0" : "=r" (ds));
+	asm("movl %%cs,%0" : "=r" (cs));
+	asm("movl %%es,%0" : "=r" (es));
 	asm("movl %%fs,%0" : "=r" (fsindex));
 	asm("movl %%gs,%0" : "=r" (gsindex));
 
 	rdmsrl(MSR_FS_BASE, fs);
-	rdmsrl(MSR_GS_BASE, gs); 
-	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 
+	rdmsrl(MSR_GS_BASE, gs);
+	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
 
 	cr0 = read_cr0();
 	cr2 = read_cr2();
 	cr3 = read_cr3();
 	cr4 = read_cr4();
 
-	printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 
-	       fs,fsindex,gs,gsindex,shadowgs); 
-	printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 
-	printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
+	printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+	       fs, fsindex, gs, gsindex, shadowgs);
+	printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
+			es, cr0);
+	printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
+			cr4);
 
 	get_debugreg(d0, 0);
 	get_debugreg(d1, 1);
 	get_debugreg(d2, 2);
-	printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
+	printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
 	get_debugreg(d3, 3);
 	get_debugreg(d6, 6);
 	get_debugreg(d7, 7);
-	printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+	printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
 }
 
 void show_regs(struct pt_regs *regs)
 {
-	printk("CPU %d:", smp_processor_id());
+	printk(KERN_INFO "CPU %d:", smp_processor_id());
 	__show_regs(regs);
 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
 }
@@ -240,6 +243,14 @@
 		t->io_bitmap_max = 0;
 		put_cpu();
 	}
+#ifdef CONFIG_X86_DS
+	/* Free any DS contexts that have not been properly released. */
+	if (unlikely(t->ds_ctx)) {
+		/* we clear debugctl to make sure DS is not used. */
+		update_debugctlmsr(0);
+		ds_free(t->ds_ctx);
+	}
+#endif /* CONFIG_X86_DS */
 }
 
 void flush_thread(void)
@@ -315,10 +326,10 @@
 
 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 		unsigned long unused,
-	struct task_struct * p, struct pt_regs * regs)
+	struct task_struct *p, struct pt_regs *regs)
 {
 	int err;
-	struct pt_regs * childregs;
+	struct pt_regs *childregs;
 	struct task_struct *me = current;
 
 	childregs = ((struct pt_regs *)
@@ -363,10 +374,10 @@
 		if (test_thread_flag(TIF_IA32))
 			err = do_set_thread_area(p, -1,
 				(struct user_desc __user *)childregs->si, 0);
-		else 			
-#endif	 
-			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
-		if (err) 
+		else
+#endif
+			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
+		if (err)
 			goto out;
 	}
 	err = 0;
@@ -473,13 +484,27 @@
 	next = &next_p->thread;
 
 	debugctl = prev->debugctlmsr;
-	if (next->ds_area_msr != prev->ds_area_msr) {
-		/* we clear debugctl to make sure DS
-		 * is not in use when we change it */
-		debugctl = 0;
-		update_debugctlmsr(0);
-		wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
+
+#ifdef CONFIG_X86_DS
+	{
+		unsigned long ds_prev = 0, ds_next = 0;
+
+		if (prev->ds_ctx)
+			ds_prev = (unsigned long)prev->ds_ctx->ds;
+		if (next->ds_ctx)
+			ds_next = (unsigned long)next->ds_ctx->ds;
+
+		if (ds_next != ds_prev) {
+			/*
+			 * We clear debugctl to make sure DS
+			 * is not in use when we change it:
+			 */
+			debugctl = 0;
+			update_debugctlmsr(0);
+			wrmsrl(MSR_IA32_DS_AREA, ds_next);
+		}
 	}
+#endif /* CONFIG_X86_DS */
 
 	if (next->debugctlmsr != debugctl)
 		update_debugctlmsr(next->debugctlmsr);
@@ -517,13 +542,13 @@
 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 	}
 
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
 	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
 		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
 
 	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
 		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
 }
 
 /*
@@ -545,7 +570,7 @@
 	unsigned fsindex, gsindex;
 
 	/* we're going to use this soon, after a few expensive things */
-	if (next_p->fpu_counter>5)
+	if (next_p->fpu_counter > 5)
 		prefetch(next->xstate);
 
 	/*
@@ -553,13 +578,13 @@
 	 */
 	load_sp0(tss, next);
 
-	/* 
+	/*
 	 * Switch DS and ES.
 	 * This won't pick up thread selector changes, but I guess that is ok.
 	 */
 	savesegment(es, prev->es);
 	if (unlikely(next->es | prev->es))
-		loadsegment(es, next->es); 
+		loadsegment(es, next->es);
 
 	savesegment(ds, prev->ds);
 	if (unlikely(next->ds | prev->ds))
@@ -585,7 +610,7 @@
 	 */
 	arch_leave_lazy_cpu_mode();
 
-	/* 
+	/*
 	 * Switch FS and GS.
 	 *
 	 * Segment register != 0 always requires a reload.  Also
@@ -594,13 +619,13 @@
 	 */
 	if (unlikely(fsindex | next->fsindex | prev->fs)) {
 		loadsegment(fs, next->fsindex);
-		/* 
+		/*
 		 * Check if the user used a selector != 0; if yes
 		 *  clear 64bit base, since overloaded base is always
 		 *  mapped to the Null selector
 		 */
 		if (fsindex)
-			prev->fs = 0;				
+			prev->fs = 0;
 	}
 	/* when next process has a 64bit base use it */
 	if (next->fs)
@@ -610,7 +635,7 @@
 	if (unlikely(gsindex | next->gsindex | prev->gs)) {
 		load_gs_index(next->gsindex);
 		if (gsindex)
-			prev->gs = 0;				
+			prev->gs = 0;
 	}
 	if (next->gs)
 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
@@ -619,12 +644,12 @@
 	/* Must be after DS reload */
 	unlazy_fpu(prev_p);
 
-	/* 
+	/*
 	 * Switch the PDA and FPU contexts.
 	 */
 	prev->usersp = read_pda(oldrsp);
 	write_pda(oldrsp, next->usersp);
-	write_pda(pcurrent, next_p); 
+	write_pda(pcurrent, next_p);
 
 	write_pda(kernelstack,
 		  (unsigned long)task_stack_page(next_p) +
@@ -665,7 +690,7 @@
 		char __user * __user *envp, struct pt_regs *regs)
 {
 	long error;
-	char * filename;
+	char *filename;
 
 	filename = getname(name);
 	error = PTR_ERR(filename);
@@ -723,55 +748,55 @@
 unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long stack;
-	u64 fp,ip;
+	u64 fp, ip;
 	int count = 0;
 
-	if (!p || p == current || p->state==TASK_RUNNING)
-		return 0; 
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
 	stack = (unsigned long)task_stack_page(p);
 	if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
 		return 0;
 	fp = *(u64 *)(p->thread.sp);
-	do { 
+	do {
 		if (fp < (unsigned long)stack ||
 		    fp > (unsigned long)stack+THREAD_SIZE)
-			return 0; 
+			return 0;
 		ip = *(u64 *)(fp+8);
 		if (!in_sched_functions(ip))
 			return ip;
-		fp = *(u64 *)fp; 
-	} while (count++ < 16); 
+		fp = *(u64 *)fp;
+	} while (count++ < 16);
 	return 0;
 }
 
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{ 
-	int ret = 0; 
+{
+	int ret = 0;
 	int doit = task == current;
 	int cpu;
 
-	switch (code) { 
+	switch (code) {
 	case ARCH_SET_GS:
 		if (addr >= TASK_SIZE_OF(task))
-			return -EPERM; 
+			return -EPERM;
 		cpu = get_cpu();
-		/* handle small bases via the GDT because that's faster to 
+		/* handle small bases via the GDT because that's faster to
 		   switch. */
-		if (addr <= 0xffffffff) {  
-			set_32bit_tls(task, GS_TLS, addr); 
-			if (doit) { 
+		if (addr <= 0xffffffff) {
+			set_32bit_tls(task, GS_TLS, addr);
+			if (doit) {
 				load_TLS(&task->thread, cpu);
-				load_gs_index(GS_TLS_SEL); 
+				load_gs_index(GS_TLS_SEL);
 			}
-			task->thread.gsindex = GS_TLS_SEL; 
+			task->thread.gsindex = GS_TLS_SEL;
 			task->thread.gs = 0;
-		} else { 
+		} else {
 			task->thread.gsindex = 0;
 			task->thread.gs = addr;
 			if (doit) {
 				load_gs_index(0);
 				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
-			} 
+			}
 		}
 		put_cpu();
 		break;
@@ -825,8 +850,7 @@
 				rdmsrl(MSR_KERNEL_GS_BASE, base);
 			else
 				base = task->thread.gs;
-		}
-		else
+		} else
 			base = task->thread.gs;
 		ret = put_user(base, (unsigned long __user *)addr);
 		break;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e37dccc..e375b65 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/regset.h>
+#include <linux/tracehook.h>
 #include <linux/user.h>
 #include <linux/elf.h>
 #include <linux/security.h>
@@ -69,7 +70,7 @@
 
 #define FLAG_MASK		FLAG_MASK_32
 
-static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
 {
 	BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
 	regno >>= 2;
@@ -554,45 +555,115 @@
 	return 0;
 }
 
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
+/*
+ * The configuration for a particular BTS hardware implementation.
+ */
+struct bts_configuration {
+	/* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
+	unsigned char  sizeof_bts;
+	/* the size of a field in the BTS record in bytes */
+	unsigned char  sizeof_field;
+	/* a bitmask to enable/disable BTS in DEBUGCTL MSR */
+	unsigned long debugctl_mask;
+};
+static struct bts_configuration bts_cfg;
 
-static int ptrace_bts_get_size(struct task_struct *child)
+#define BTS_MAX_RECORD_SIZE (8 * 3)
+
+
+/*
+ * Branch Trace Store (BTS) uses the following format. Different
+ * architectures vary in the size of those fields.
+ * - source linear address
+ * - destination linear address
+ * - flags
+ *
+ * Later architectures use 64bit pointers throughout, whereas earlier
+ * architectures use 32bit pointers in 32bit mode.
+ *
+ * We compute the base address for the first 8 fields based on:
+ * - the field size stored in the DS configuration
+ * - the relative field position
+ *
+ * In order to store additional information in the BTS buffer, we use
+ * a special source address to indicate that the record requires
+ * special interpretation.
+ *
+ * Netburst indicated via a bit in the flags field whether the branch
+ * was predicted; this is ignored.
+ */
+
+enum bts_field {
+	bts_from = 0,
+	bts_to,
+	bts_flags,
+
+	bts_escape = (unsigned long)-1,
+	bts_qual = bts_to,
+	bts_jiffies = bts_flags
+};
+
+static inline unsigned long bts_get(const char *base, enum bts_field field)
 {
-	if (!child->thread.ds_area_msr)
-		return -ENXIO;
-
-	return ds_get_bts_index((void *)child->thread.ds_area_msr);
+	base += (bts_cfg.sizeof_field * field);
+	return *(unsigned long *)base;
 }
 
-static int ptrace_bts_read_record(struct task_struct *child,
-				  long index,
+static inline void bts_set(char *base, enum bts_field field, unsigned long val)
+{
+	base += (bts_cfg.sizeof_field * field);;
+	(*(unsigned long *)base) = val;
+}
+
+/*
+ * Translate a BTS record from the raw format into the bts_struct format
+ *
+ * out (out): bts_struct interpretation
+ * raw: raw BTS record
+ */
+static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
+{
+	memset(out, 0, sizeof(*out));
+	if (bts_get(raw, bts_from) == bts_escape) {
+		out->qualifier       = bts_get(raw, bts_qual);
+		out->variant.jiffies = bts_get(raw, bts_jiffies);
+	} else {
+		out->qualifier = BTS_BRANCH;
+		out->variant.lbr.from_ip = bts_get(raw, bts_from);
+		out->variant.lbr.to_ip   = bts_get(raw, bts_to);
+	}
+}
+
+static int ptrace_bts_read_record(struct task_struct *child, size_t index,
 				  struct bts_struct __user *out)
 {
 	struct bts_struct ret;
-	int retval;
-	int bts_end;
-	int bts_index;
+	const void *bts_record;
+	size_t bts_index, bts_end;
+	int error;
 
-	if (!child->thread.ds_area_msr)
-		return -ENXIO;
+	error = ds_get_bts_end(child, &bts_end);
+	if (error < 0)
+		return error;
 
-	if (index < 0)
-		return -EINVAL;
-
-	bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
 	if (bts_end <= index)
 		return -EINVAL;
 
-	/* translate the ptrace bts index into the ds bts index */
-	bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr);
-	bts_index -= (index + 1);
-	if (bts_index < 0)
-		bts_index += bts_end;
+	error = ds_get_bts_index(child, &bts_index);
+	if (error < 0)
+		return error;
 
-	retval = ds_read_bts((void *)child->thread.ds_area_msr,
-			     bts_index, &ret);
-	if (retval < 0)
-		return retval;
+	/* translate the ptrace bts index into the ds bts index */
+	bts_index += bts_end - (index + 1);
+	if (bts_end <= bts_index)
+		bts_index -= bts_end;
+
+	error = ds_access_bts(child, bts_index, &bts_record);
+	if (error < 0)
+		return error;
+
+	ptrace_bts_translate_record(&ret, bts_record);
 
 	if (copy_to_user(out, &ret, sizeof(ret)))
 		return -EFAULT;
@@ -600,101 +671,106 @@
 	return sizeof(ret);
 }
 
-static int ptrace_bts_clear(struct task_struct *child)
-{
-	if (!child->thread.ds_area_msr)
-		return -ENXIO;
-
-	return ds_clear((void *)child->thread.ds_area_msr);
-}
-
 static int ptrace_bts_drain(struct task_struct *child,
 			    long size,
 			    struct bts_struct __user *out)
 {
-	int end, i;
-	void *ds = (void *)child->thread.ds_area_msr;
+	struct bts_struct ret;
+	const unsigned char *raw;
+	size_t end, i;
+	int error;
 
-	if (!ds)
-		return -ENXIO;
-
-	end = ds_get_bts_index(ds);
-	if (end <= 0)
-		return end;
+	error = ds_get_bts_index(child, &end);
+	if (error < 0)
+		return error;
 
 	if (size < (end * sizeof(struct bts_struct)))
 		return -EIO;
 
-	for (i = 0; i < end; i++, out++) {
-		struct bts_struct ret;
-		int retval;
+	error = ds_access_bts(child, 0, (const void **)&raw);
+	if (error < 0)
+		return error;
 
-		retval = ds_read_bts(ds, i, &ret);
-		if (retval < 0)
-			return retval;
+	for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) {
+		ptrace_bts_translate_record(&ret, raw);
 
 		if (copy_to_user(out, &ret, sizeof(ret)))
 			return -EFAULT;
 	}
 
-	ds_clear(ds);
+	error = ds_clear_bts(child);
+	if (error < 0)
+		return error;
 
 	return end;
 }
 
+static void ptrace_bts_ovfl(struct task_struct *child)
+{
+	send_sig(child->thread.bts_ovfl_signal, child, 0);
+}
+
 static int ptrace_bts_config(struct task_struct *child,
 			     long cfg_size,
 			     const struct ptrace_bts_config __user *ucfg)
 {
 	struct ptrace_bts_config cfg;
-	int bts_size, ret = 0;
-	void *ds;
+	int error = 0;
 
+	error = -EOPNOTSUPP;
+	if (!bts_cfg.sizeof_bts)
+		goto errout;
+
+	error = -EIO;
 	if (cfg_size < sizeof(cfg))
-		return -EIO;
+		goto errout;
 
+	error = -EFAULT;
 	if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
-		return -EFAULT;
+		goto errout;
 
-	if ((int)cfg.size < 0)
-		return -EINVAL;
+	error = -EINVAL;
+	if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
+	    !(cfg.flags & PTRACE_BTS_O_ALLOC))
+		goto errout;
 
-	bts_size = 0;
-	ds = (void *)child->thread.ds_area_msr;
-	if (ds) {
-		bts_size = ds_get_bts_size(ds);
-		if (bts_size < 0)
-			return bts_size;
-	}
-	cfg.size = PAGE_ALIGN(cfg.size);
+	if (cfg.flags & PTRACE_BTS_O_ALLOC) {
+		ds_ovfl_callback_t ovfl = NULL;
+		unsigned int sig = 0;
 
-	if (bts_size != cfg.size) {
-		ret = ptrace_bts_realloc(child, cfg.size,
-					 cfg.flags & PTRACE_BTS_O_CUT_SIZE);
-		if (ret < 0)
+		/* we ignore the error in case we were not tracing child */
+		(void)ds_release_bts(child);
+
+		if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
+			if (!cfg.signal)
+				goto errout;
+
+			sig  = cfg.signal;
+			ovfl = ptrace_bts_ovfl;
+		}
+
+		error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl);
+		if (error < 0)
 			goto errout;
 
-		ds = (void *)child->thread.ds_area_msr;
+		child->thread.bts_ovfl_signal = sig;
 	}
 
-	if (cfg.flags & PTRACE_BTS_O_SIGNAL)
-		ret = ds_set_overflow(ds, DS_O_SIGNAL);
-	else
-		ret = ds_set_overflow(ds, DS_O_WRAP);
-	if (ret < 0)
+	error = -EINVAL;
+	if (!child->thread.ds_ctx && cfg.flags)
 		goto errout;
 
 	if (cfg.flags & PTRACE_BTS_O_TRACE)
-		child->thread.debugctlmsr |= ds_debugctl_mask();
+		child->thread.debugctlmsr |= bts_cfg.debugctl_mask;
 	else
-		child->thread.debugctlmsr &= ~ds_debugctl_mask();
+		child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
 
 	if (cfg.flags & PTRACE_BTS_O_SCHED)
 		set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
 	else
 		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
 
-	ret = sizeof(cfg);
+	error = sizeof(cfg);
 
 out:
 	if (child->thread.debugctlmsr)
@@ -702,10 +778,10 @@
 	else
 		clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
 
-	return ret;
+	return error;
 
 errout:
-	child->thread.debugctlmsr &= ~ds_debugctl_mask();
+	child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
 	clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
 	goto out;
 }
@@ -714,119 +790,79 @@
 			     long cfg_size,
 			     struct ptrace_bts_config __user *ucfg)
 {
-	void *ds = (void *)child->thread.ds_area_msr;
 	struct ptrace_bts_config cfg;
+	size_t end;
+	const void *base, *max;
+	int error;
 
 	if (cfg_size < sizeof(cfg))
 		return -EIO;
 
+	error = ds_get_bts_end(child, &end);
+	if (error < 0)
+		return error;
+
+	error = ds_access_bts(child, /* index = */ 0, &base);
+	if (error < 0)
+		return error;
+
+	error = ds_access_bts(child, /* index = */ end, &max);
+	if (error < 0)
+		return error;
+
 	memset(&cfg, 0, sizeof(cfg));
-
-	if (ds) {
-		cfg.size = ds_get_bts_size(ds);
-
-		if (ds_get_overflow(ds) == DS_O_SIGNAL)
-			cfg.flags |= PTRACE_BTS_O_SIGNAL;
-
-		if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
-		    child->thread.debugctlmsr & ds_debugctl_mask())
-			cfg.flags |= PTRACE_BTS_O_TRACE;
-
-		if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
-			cfg.flags |= PTRACE_BTS_O_SCHED;
-	}
-
+	cfg.size = (max - base);
+	cfg.signal = child->thread.bts_ovfl_signal;
 	cfg.bts_size = sizeof(struct bts_struct);
 
+	if (cfg.signal)
+		cfg.flags |= PTRACE_BTS_O_SIGNAL;
+
+	if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
+	    child->thread.debugctlmsr & bts_cfg.debugctl_mask)
+		cfg.flags |= PTRACE_BTS_O_TRACE;
+
+	if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
+		cfg.flags |= PTRACE_BTS_O_SCHED;
+
 	if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
 		return -EFAULT;
 
 	return sizeof(cfg);
 }
 
-
 static int ptrace_bts_write_record(struct task_struct *child,
 				   const struct bts_struct *in)
 {
-	int retval;
+	unsigned char bts_record[BTS_MAX_RECORD_SIZE];
 
-	if (!child->thread.ds_area_msr)
-		return -ENXIO;
+	BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts);
 
-	retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
-	if (retval)
-		return retval;
+	memset(bts_record, 0, bts_cfg.sizeof_bts);
+	switch (in->qualifier) {
+	case BTS_INVALID:
+		break;
 
-	return sizeof(*in);
-}
+	case BTS_BRANCH:
+		bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
+		bts_set(bts_record, bts_to,   in->variant.lbr.to_ip);
+		break;
 
-static int ptrace_bts_realloc(struct task_struct *child,
-			      int size, int reduce_size)
-{
-	unsigned long rlim, vm;
-	int ret, old_size;
+	case BTS_TASK_ARRIVES:
+	case BTS_TASK_DEPARTS:
+		bts_set(bts_record, bts_from,    bts_escape);
+		bts_set(bts_record, bts_qual,    in->qualifier);
+		bts_set(bts_record, bts_jiffies, in->variant.jiffies);
+		break;
 
-	if (size < 0)
+	default:
 		return -EINVAL;
-
-	old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
-	if (old_size < 0)
-		return old_size;
-
-	ret = ds_free((void **)&child->thread.ds_area_msr);
-	if (ret < 0)
-		goto out;
-
-	size >>= PAGE_SHIFT;
-	old_size >>= PAGE_SHIFT;
-
-	current->mm->total_vm  -= old_size;
-	current->mm->locked_vm -= old_size;
-
-	if (size == 0)
-		goto out;
-
-	rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
-	vm = current->mm->total_vm  + size;
-	if (rlim < vm) {
-		ret = -ENOMEM;
-
-		if (!reduce_size)
-			goto out;
-
-		size = rlim - current->mm->total_vm;
-		if (size <= 0)
-			goto out;
 	}
 
-	rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
-	vm = current->mm->locked_vm  + size;
-	if (rlim < vm) {
-		ret = -ENOMEM;
-
-		if (!reduce_size)
-			goto out;
-
-		size = rlim - current->mm->locked_vm;
-		if (size <= 0)
-			goto out;
-	}
-
-	ret = ds_allocate((void **)&child->thread.ds_area_msr,
-			  size << PAGE_SHIFT);
-	if (ret < 0)
-		goto out;
-
-	current->mm->total_vm  += size;
-	current->mm->locked_vm += size;
-
-out:
-	if (child->thread.ds_area_msr)
-		set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-	else
-		clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-
-	return ret;
+	/* The writing task will be the switched-to task on a context
+	 * switch. It needs to write into the switched-from task's BTS
+	 * buffer. */
+	return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
 }
 
 void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -839,7 +875,66 @@
 
 	ptrace_bts_write_record(tsk, &rec);
 }
-#endif /* X86_BTS */
+
+static const struct bts_configuration bts_cfg_netburst = {
+	.sizeof_bts    = sizeof(long) * 3,
+	.sizeof_field  = sizeof(long),
+	.debugctl_mask = (1<<2)|(1<<3)|(1<<5)
+};
+
+static const struct bts_configuration bts_cfg_pentium_m = {
+	.sizeof_bts    = sizeof(long) * 3,
+	.sizeof_field  = sizeof(long),
+	.debugctl_mask = (1<<6)|(1<<7)
+};
+
+static const struct bts_configuration bts_cfg_core2 = {
+	.sizeof_bts    = 8 * 3,
+	.sizeof_field  = 8,
+	.debugctl_mask = (1<<6)|(1<<7)|(1<<9)
+};
+
+static inline void bts_configure(const struct bts_configuration *cfg)
+{
+	bts_cfg = *cfg;
+}
+
+void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
+{
+	switch (c->x86) {
+	case 0x6:
+		switch (c->x86_model) {
+		case 0xD:
+		case 0xE: /* Pentium M */
+			bts_configure(&bts_cfg_pentium_m);
+			break;
+		case 0xF: /* Core2 */
+        case 0x1C: /* Atom */
+			bts_configure(&bts_cfg_core2);
+			break;
+		default:
+			/* sorry, don't know about them */
+			break;
+		}
+		break;
+	case 0xF:
+		switch (c->x86_model) {
+		case 0x0:
+		case 0x1:
+		case 0x2: /* Netburst */
+			bts_configure(&bts_cfg_netburst);
+			break;
+		default:
+			/* sorry, don't know about them */
+			break;
+		}
+		break;
+	default:
+		/* sorry, don't know about them */
+		break;
+	}
+}
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 /*
  * Called by kernel/ptrace.c when detaching..
@@ -852,15 +947,15 @@
 #ifdef TIF_SYSCALL_EMU
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
-	if (child->thread.ds_area_msr) {
-#ifdef X86_BTS
-		ptrace_bts_realloc(child, 0, 0);
-#endif
-		child->thread.debugctlmsr &= ~ds_debugctl_mask();
-		if (!child->thread.debugctlmsr)
-			clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
-	}
+#ifdef CONFIG_X86_PTRACE_BTS
+	(void)ds_release_bts(child);
+
+	child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
+	if (!child->thread.debugctlmsr)
+		clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+
+	clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+#endif /* CONFIG_X86_PTRACE_BTS */
 }
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -980,7 +1075,7 @@
 	/*
 	 * These bits need more cooking - not enabled yet:
 	 */
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
 	case PTRACE_BTS_CONFIG:
 		ret = ptrace_bts_config
 			(child, data, (struct ptrace_bts_config __user *)addr);
@@ -992,7 +1087,7 @@
 		break;
 
 	case PTRACE_BTS_SIZE:
-		ret = ptrace_bts_get_size(child);
+		ret = ds_get_bts_index(child, /* pos = */ NULL);
 		break;
 
 	case PTRACE_BTS_GET:
@@ -1001,14 +1096,14 @@
 		break;
 
 	case PTRACE_BTS_CLEAR:
-		ret = ptrace_bts_clear(child);
+		ret = ds_clear_bts(child);
 		break;
 
 	case PTRACE_BTS_DRAIN:
 		ret = ptrace_bts_drain
 			(child, data, (struct bts_struct __user *) addr);
 		break;
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 	default:
 		ret = ptrace_request(child, request, addr, data);
@@ -1375,30 +1470,6 @@
 	force_sig_info(SIGTRAP, &info, tsk);
 }
 
-static void syscall_trace(struct pt_regs *regs)
-{
-	if (!(current->ptrace & PT_PTRACED))
-		return;
-
-#if 0
-	printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
-	       current->comm,
-	       regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
-	       current_thread_info()->flags, current->ptrace);
-#endif
-
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-				? 0x80 : 0));
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
-}
 
 #ifdef CONFIG_X86_32
 # define IS_IA32	1
@@ -1432,8 +1503,9 @@
 	if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
 		ret = -1L;
 
-	if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
-		syscall_trace(regs);
+	if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
+	    tracehook_report_syscall_entry(regs))
+		ret = -1L;
 
 	if (unlikely(current->audit_context)) {
 		if (IS_IA32)
@@ -1459,7 +1531,7 @@
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
-		syscall_trace(regs);
+		tracehook_report_syscall_exit(regs, 0);
 
 	/*
 	 * If TIF_SYSCALL_EMU is set, we only get here because of
@@ -1475,6 +1547,6 @@
 	 * system call instruction.
 	 */
 	if (test_thread_flag(TIF_SINGLESTEP) &&
-	    (current->ptrace & PT_PTRACED))
+	    tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
 		send_sigtrap(current, regs, 0);
 }
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 724adfc..f4c93f1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -29,7 +29,11 @@
 
 static const struct desc_ptr no_idt = {};
 static int reboot_mode;
-enum reboot_type reboot_type = BOOT_KBD;
+/*
+ * Keyboard reset and triple fault may result in INIT, not RESET, which
+ * doesn't work when we're in vmx root mode.  Try ACPI first.
+ */
+enum reboot_type reboot_type = BOOT_ACPI;
 int reboot_force;
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9838f25..141efab 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -223,6 +223,9 @@
 #define RAMDISK_LOAD_FLAG		0x4000
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
+#ifdef CONFIG_CMDLINE_BOOL
+static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
+#endif
 
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 struct edd edd;
@@ -665,6 +668,19 @@
 	bss_resource.start = virt_to_phys(&__bss_start);
 	bss_resource.end = virt_to_phys(&__bss_stop)-1;
 
+#ifdef CONFIG_CMDLINE_BOOL
+#ifdef CONFIG_CMDLINE_OVERRIDE
+	strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+#else
+	if (builtin_cmdline[0]) {
+		/* append boot loader cmdline to builtin */
+		strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
+		strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+		strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+	}
+#endif
+#endif
+
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 76e305e..0e67f72 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -162,9 +162,16 @@
 			printk(KERN_INFO
 			       "cpu %d has no node %d or node-local memory\n",
 				cpu, node);
+			if (ptr)
+				printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n",
+					 cpu, __pa(ptr));
 		}
-		else
+		else {
 			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
+			if (ptr)
+				printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
+					 cpu, node, __pa(ptr));
+		}
 #endif
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
index 72bbb51..8b4956e 100644
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h
@@ -24,4 +24,9 @@
 	struct ucontext uc;
 	struct siginfo info;
 };
+
+int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+		sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct k_sigaction *ka,
+		sigset_t *set, struct pt_regs *regs);
 #endif
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 6fb5bcd..2a2435d 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
+#include <linux/tracehook.h>
 #include <linux/elf.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
@@ -26,6 +27,7 @@
 #include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/vdso.h>
+#include <asm/syscalls.h>
 
 #include "sigframe.h"
 
@@ -558,8 +560,6 @@
 	 * handler too.
 	 */
 	regs->flags &= ~X86_EFLAGS_TF;
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
 
 	spin_lock_irq(&current->sighand->siglock);
 	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
@@ -568,6 +568,9 @@
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
+	tracehook_signal_handler(sig, info, ka, regs,
+				 test_thread_flag(TIF_SINGLESTEP));
+
 	return 0;
 }
 
@@ -661,5 +664,10 @@
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs);
 
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
+
 	clear_thread_flag(TIF_IRET);
 }
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ca316b5..694aa88 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -15,17 +15,21 @@
 #include <linux/errno.h>
 #include <linux/wait.h>
 #include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/unistd.h>
 #include <linux/stddef.h>
 #include <linux/personality.h>
 #include <linux/compiler.h>
+#include <linux/uaccess.h>
+
 #include <asm/processor.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/proto.h>
 #include <asm/ia32_unistd.h>
 #include <asm/mce.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
 #include "sigframe.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -41,11 +45,6 @@
 # define FIX_EFLAGS	__FIX_EFLAGS
 #endif
 
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-               sigset_t *set, struct pt_regs * regs); 
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
-            sigset_t *set, struct pt_regs * regs); 
-
 asmlinkage long
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		struct pt_regs *regs)
@@ -128,7 +127,7 @@
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-#define COPY(x)		err |= __get_user(regs->x, &sc->x)
+#define COPY(x)		(err |= __get_user(regs->x, &sc->x))
 
 	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
 	COPY(dx); COPY(cx); COPY(ip);
@@ -158,7 +157,7 @@
 	}
 
 	{
-		struct _fpstate __user * buf;
+		struct _fpstate __user *buf;
 		err |= __get_user(buf, &sc->fpstate);
 
 		if (buf) {
@@ -198,7 +197,7 @@
 	current->blocked = set;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
-	
+
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
@@ -208,16 +207,17 @@
 	return ax;
 
 badframe:
-	signal_fault(regs,frame,"sigreturn");
+	signal_fault(regs, frame, "sigreturn");
 	return 0;
-}	
+}
 
 /*
  * Set up a signal frame.
  */
 
 static inline int
-setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me)
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+		unsigned long mask, struct task_struct *me)
 {
 	int err = 0;
 
@@ -273,35 +273,35 @@
 }
 
 static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			   sigset_t *set, struct pt_regs * regs)
+			   sigset_t *set, struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
-	struct _fpstate __user *fp = NULL; 
+	struct _fpstate __user *fp = NULL;
 	int err = 0;
 	struct task_struct *me = current;
 
 	if (used_math()) {
-		fp = get_stack(ka, regs, sizeof(struct _fpstate)); 
+		fp = get_stack(ka, regs, sizeof(struct _fpstate));
 		frame = (void __user *)round_down(
 			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
 
 		if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
 			goto give_sigsegv;
 
-		if (save_i387(fp) < 0) 
-			err |= -1; 
+		if (save_i387(fp) < 0)
+			err |= -1;
 	} else
 		frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto give_sigsegv;
 
-	if (ka->sa.sa_flags & SA_SIGINFO) { 
+	if (ka->sa.sa_flags & SA_SIGINFO) {
 		err |= copy_siginfo_to_user(&frame->info, info);
 		if (err)
 			goto give_sigsegv;
 	}
-		
+
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
@@ -311,9 +311,9 @@
 	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
 	err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
-	if (sizeof(*set) == 16) { 
+	if (sizeof(*set) == 16) {
 		__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
-		__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); 
+		__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
 	} else
 		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 
@@ -324,7 +324,7 @@
 		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
 	} else {
 		/* could use a vstub here */
-		goto give_sigsegv; 
+		goto give_sigsegv;
 	}
 
 	if (err)
@@ -332,7 +332,7 @@
 
 	/* Set up registers for signal handler */
 	regs->di = sig;
-	/* In case the signal handler was declared without prototypes */ 
+	/* In case the signal handler was declared without prototypes */
 	regs->ax = 0;
 
 	/* This also works for non SA_SIGINFO handlers because they expect the
@@ -355,37 +355,8 @@
 }
 
 /*
- * Return -1L or the syscall number that @regs is executing.
- */
-static long current_syscall(struct pt_regs *regs)
-{
-	/*
-	 * We always sign-extend a -1 value being set here,
-	 * so this is always either -1L or a syscall number.
-	 */
-	return regs->orig_ax;
-}
-
-/*
- * Return a value that is -EFOO if the system call in @regs->orig_ax
- * returned an error.  This only works for @regs from @current.
- */
-static long current_syscall_ret(struct pt_regs *regs)
-{
-#ifdef CONFIG_IA32_EMULATION
-	if (test_thread_flag(TIF_IA32))
-		/*
-		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
-		 * and will match correctly in comparisons.
-		 */
-		return (int) regs->ax;
-#endif
-	return regs->ax;
-}
-
-/*
  * OK, we're invoking a handler
- */	
+ */
 
 static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
@@ -394,9 +365,9 @@
 	int ret;
 
 	/* Are we from a system call? */
-	if (current_syscall(regs) >= 0) {
+	if (syscall_get_nr(current, regs) >= 0) {
 		/* If so, check system call restarting.. */
-		switch (current_syscall_ret(regs)) {
+		switch (syscall_get_error(current, regs)) {
 		case -ERESTART_RESTARTBLOCK:
 		case -ERESTARTNOHAND:
 			regs->ax = -EINTR;
@@ -429,7 +400,7 @@
 			ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs);
 		else
 			ret = ia32_setup_frame(sig, ka, oldset, regs);
-	} else 
+	} else
 #endif
 	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
@@ -453,15 +424,16 @@
 		 * handler too.
 		 */
 		regs->flags &= ~X86_EFLAGS_TF;
-		if (test_thread_flag(TIF_SINGLESTEP))
-			ptrace_notify(SIGTRAP);
 
 		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+		sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
 		if (!(ka->sa.sa_flags & SA_NODEFER))
-			sigaddset(&current->blocked,sig);
+			sigaddset(&current->blocked, sig);
 		recalc_sigpending();
 		spin_unlock_irq(&current->sighand->siglock);
+
+		tracehook_signal_handler(sig, info, ka, regs,
+					 test_thread_flag(TIF_SINGLESTEP));
 	}
 
 	return ret;
@@ -518,9 +490,9 @@
 	}
 
 	/* Did we come from a system call? */
-	if (current_syscall(regs) >= 0) {
+	if (syscall_get_nr(current, regs) >= 0) {
 		/* Restart the system call - no handlers present */
-		switch (current_syscall_ret(regs)) {
+		switch (syscall_get_error(current, regs)) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
@@ -558,17 +530,23 @@
 	/* deal with pending signal delivery */
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-{ 
-	struct task_struct *me = current; 
+{
+	struct task_struct *me = current;
 	if (show_unhandled_signals && printk_ratelimit()) {
 		printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
-	       me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax);
+	       me->comm, me->pid, where, frame, regs->ip,
+		   regs->sp, regs->orig_ax);
 		print_vma_addr(" in ", regs->ip);
 		printk("\n");
 	}
 
-	force_sig(SIGSEGV, me); 
-} 
+	force_sig(SIGSEGV, me);
+}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7985c5b..45531e3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -88,7 +88,7 @@
 #define get_idle_for_cpu(x)      (per_cpu(idle_thread_array, x))
 #define set_idle_for_cpu(x, p)   (per_cpu(idle_thread_array, x) = (p))
 #else
-struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
 #define get_idle_for_cpu(x)      (idle_thread_array[(x)])
 #define set_idle_for_cpu(x, p)   (idle_thread_array[(x)] = (p))
 #endif
@@ -129,7 +129,7 @@
 static cpumask_t cpu_sibling_setup_map;
 
 /* Set if we find a B stepping CPU */
-int __cpuinitdata smp_b_stepping;
+static int __cpuinitdata smp_b_stepping;
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
 
@@ -1313,16 +1313,13 @@
 	if (!num_processors)
 		num_processors = 1;
 
-#ifdef CONFIG_HOTPLUG_CPU
 	if (additional_cpus == -1) {
 		if (disabled_cpus > 0)
 			additional_cpus = disabled_cpus;
 		else
 			additional_cpus = 0;
 	}
-#else
-	additional_cpus = 0;
-#endif
+
 	possible = num_processors + additional_cpus;
 	if (possible > NR_CPUS)
 		possible = NR_CPUS;
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 7066cb8..1884a8d 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -22,6 +22,8 @@
 #include <linux/uaccess.h>
 #include <linux/unistd.h>
 
+#include <asm/syscalls.h>
+
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			  unsigned long prot, unsigned long flags,
 			  unsigned long fd, unsigned long pgoff)
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 3b360ef..6bc211a 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -13,15 +13,17 @@
 #include <linux/utsname.h>
 #include <linux/personality.h>
 #include <linux/random.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/ia32.h>
+#include <asm/syscalls.h>
 
-asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long off)
+asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
+		unsigned long prot, unsigned long flags,
+		unsigned long fd, unsigned long off)
 {
 	long error;
-	struct file * file;
+	struct file *file;
 
 	error = -EINVAL;
 	if (off & ~PAGE_MASK)
@@ -56,9 +58,9 @@
 		   unmapped base down for this case. This can give
 		   conflicts with the heap, but we assume that glibc
 		   malloc knows how to fall back to mmap. Give it 1GB
-		   of playground for now. -AK */ 
-		*begin = 0x40000000; 
-		*end = 0x80000000;		
+		   of playground for now. -AK */
+		*begin = 0x40000000;
+		*end = 0x80000000;
 		if (current->flags & PF_RANDOMIZE) {
 			new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
 			if (new_begin)
@@ -66,9 +68,9 @@
 		}
 	} else {
 		*begin = TASK_UNMAPPED_BASE;
-		*end = TASK_SIZE; 
+		*end = TASK_SIZE;
 	}
-} 
+}
 
 unsigned long
 arch_get_unmapped_area(struct file *filp, unsigned long addr,
@@ -78,11 +80,11 @@
 	struct vm_area_struct *vma;
 	unsigned long start_addr;
 	unsigned long begin, end;
-	
+
 	if (flags & MAP_FIXED)
 		return addr;
 
-	find_start_end(flags, &begin, &end); 
+	find_start_end(flags, &begin, &end);
 
 	if (len > end)
 		return -ENOMEM;
@@ -96,12 +98,12 @@
 	}
 	if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32))
 	    && len <= mm->cached_hole_size) {
-	        mm->cached_hole_size = 0;
+		mm->cached_hole_size = 0;
 		mm->free_area_cache = begin;
 	}
 	addr = mm->free_area_cache;
-	if (addr < begin) 
-		addr = begin; 
+	if (addr < begin)
+		addr = begin;
 	start_addr = addr;
 
 full_search:
@@ -127,7 +129,7 @@
 			return addr;
 		}
 		if (addr + mm->cached_hole_size < vma->vm_start)
-		        mm->cached_hole_size = vma->vm_start - addr;
+			mm->cached_hole_size = vma->vm_start - addr;
 
 		addr = vma->vm_end;
 	}
@@ -177,7 +179,7 @@
 		vma = find_vma(mm, addr-len);
 		if (!vma || addr <= vma->vm_start)
 			/* remember the address as a hint for next time */
-			return (mm->free_area_cache = addr-len);
+			return mm->free_area_cache = addr-len;
 	}
 
 	if (mm->mmap_base < len)
@@ -194,7 +196,7 @@
 		vma = find_vma(mm, addr);
 		if (!vma || addr+len <= vma->vm_start)
 			/* remember the address as a hint for next time */
-			return (mm->free_area_cache = addr);
+			return mm->free_area_cache = addr;
 
 		/* remember the largest hole we saw so far */
 		if (addr + mm->cached_hole_size < vma->vm_start)
@@ -224,13 +226,13 @@
 }
 
 
-asmlinkage long sys_uname(struct new_utsname __user * name)
+asmlinkage long sys_uname(struct new_utsname __user *name)
 {
 	int err;
 	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof(*name));
 	up_read(&uts_sem);
-	if (personality(current->personality) == PER_LINUX32) 
-		err |= copy_to_user(&name->machine, "i686", 5); 		
+	if (personality(current->personality) == PER_LINUX32)
+		err |= copy_to_user(&name->machine, "i686", 5);
 	return err ? -EFAULT : 0;
 }
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 170d43c..3d1be4f 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -8,12 +8,12 @@
 #define __NO_STUBS
 
 #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 #include <asm/unistd_64.h>
 
 #undef __SYSCALL
 #define __SYSCALL(nr, sym) [nr] = sym,
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 
 typedef void (*sys_call_ptr_t)(void);
 
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index ffe3c66..bbecf8b 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -36,6 +36,7 @@
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
 #include <asm/time.h>
+#include <asm/timer.h>
 
 #include "do_timer.h"
 
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index ab6bf37..6bb7b85 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -10,6 +10,7 @@
 #include <asm/ldt.h>
 #include <asm/processor.h>
 #include <asm/proto.h>
+#include <asm/syscalls.h>
 
 #include "tls.h"
 
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 513caac..7a31f10 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -32,6 +32,8 @@
 #include <linux/bug.h>
 #include <linux/nmi.h>
 #include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/io.h>
 
 #if defined(CONFIG_EDAC)
 #include <linux/edac.h>
@@ -45,9 +47,6 @@
 #include <asm/unwind.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
-#include <asm/nmi.h>
-#include <asm/smp.h>
-#include <asm/io.h>
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
 #include <asm/pda.h>
@@ -85,7 +84,8 @@
 
 void printk_address(unsigned long address, int reliable)
 {
-	printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
+	printk(" [<%016lx>] %s%pS\n",
+			address, reliable ?	"" : "? ", (void *) address);
 }
 
 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -98,7 +98,8 @@
 		[STACKFAULT_STACK - 1] = "#SS",
 		[MCE_STACK - 1] = "#MC",
 #if DEBUG_STKSZ > EXCEPTION_STKSZ
-		[N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
+		[N_EXCEPTION_STACKS ...
+			N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
 #endif
 	};
 	unsigned k;
@@ -163,7 +164,7 @@
 }
 
 /*
- * x86-64 can have up to three kernel stacks: 
+ * x86-64 can have up to three kernel stacks:
  * process stack
  * interrupt stack
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
@@ -219,7 +220,7 @@
 		const struct stacktrace_ops *ops, void *data)
 {
 	const unsigned cpu = get_cpu();
-	unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
+	unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
 	unsigned used = 0;
 	struct thread_info *tinfo;
 
@@ -237,7 +238,7 @@
 	if (!bp) {
 		if (task == current) {
 			/* Grab bp right from our regs */
-			asm("movq %%rbp, %0" : "=r" (bp) :);
+			asm("movq %%rbp, %0" : "=r" (bp) : );
 		} else {
 			/* bp is the last reg pushed by switch_to */
 			bp = *(unsigned long *) task->thread.sp;
@@ -339,9 +340,8 @@
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		unsigned long *stack, unsigned long bp, char *log_lvl)
 {
-	printk("\nCall Trace:\n");
+	printk("Call Trace:\n");
 	dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-	printk("\n");
 }
 
 void show_trace(struct task_struct *task, struct pt_regs *regs,
@@ -357,11 +357,15 @@
 	unsigned long *stack;
 	int i;
 	const int cpu = smp_processor_id();
-	unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
-	unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+	unsigned long *irqstack_end =
+		(unsigned long *) (cpu_pda(cpu)->irqstackptr);
+	unsigned long *irqstack =
+		(unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
 
-	// debugging aid: "show_stack(NULL, NULL);" prints the
-	// back trace for this cpu.
+	/*
+	 * debugging aid: "show_stack(NULL, NULL);" prints the
+	 * back trace for this cpu.
+	 */
 
 	if (sp == NULL) {
 		if (task)
@@ -386,6 +390,7 @@
 		printk(" %016lx", *stack++);
 		touch_nmi_watchdog();
 	}
+	printk("\n");
 	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
 
@@ -404,7 +409,7 @@
 
 #ifdef CONFIG_FRAME_POINTER
 	if (!bp)
-		asm("movq %%rbp, %0" : "=r" (bp):);
+		asm("movq %%rbp, %0" : "=r" (bp) : );
 #endif
 
 	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
@@ -414,7 +419,6 @@
 		init_utsname()->version);
 	show_trace(NULL, NULL, &stack, bp);
 }
-
 EXPORT_SYMBOL(dump_stack);
 
 void show_registers(struct pt_regs *regs)
@@ -443,7 +447,6 @@
 		printk("Stack: ");
 		show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
 				regs->bp, "");
-		printk("\n");
 
 		printk(KERN_EMERG "Code: ");
 
@@ -493,7 +496,7 @@
 	raw_local_irq_save(flags);
 	cpu = smp_processor_id();
 	if (!__raw_spin_trylock(&die_lock)) {
-		if (cpu == die_owner) 
+		if (cpu == die_owner)
 			/* nested oops. should stop eventually */;
 		else
 			__raw_spin_lock(&die_lock);
@@ -638,7 +641,7 @@
 }
 
 #define DO_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code)	\
+asmlinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
 							== NOTIFY_STOP)	\
@@ -648,7 +651,7 @@
 }
 
 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)		\
-asmlinkage void do_##name(struct pt_regs * regs, long error_code)	\
+asmlinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
 	siginfo_t info;							\
 	info.si_signo = signr;						\
@@ -683,7 +686,7 @@
 	preempt_conditional_cli(regs);
 }
 
-asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
+asmlinkage void do_double_fault(struct pt_regs *regs, long error_code)
 {
 	static const char str[] = "double fault";
 	struct task_struct *tsk = current;
@@ -778,9 +781,10 @@
 }
 
 static notrace __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
-	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
+			NOTIFY_STOP)
 		return;
 	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
 		reason);
@@ -882,7 +886,7 @@
 	else if (user_mode(eregs))
 		regs = task_pt_regs(current);
 	/* Exception from kernel and interrupts are enabled. Move to
- 	   kernel process stack. */
+	   kernel process stack. */
 	else if (eregs->flags & X86_EFLAGS_IF)
 		regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
 	if (eregs != regs)
@@ -891,7 +895,7 @@
 }
 
 /* runs on IST stack. */
-asmlinkage void __kprobes do_debug(struct pt_regs * regs,
+asmlinkage void __kprobes do_debug(struct pt_regs *regs,
 				   unsigned long error_code)
 {
 	struct task_struct *tsk = current;
@@ -1035,7 +1039,7 @@
 
 asmlinkage void bad_intr(void)
 {
-	printk("bad interrupt"); 
+	printk("bad interrupt");
 }
 
 asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
@@ -1047,7 +1051,7 @@
 
 	conditional_sti(regs);
 	if (!user_mode(regs) &&
-        	kernel_math_error(regs, "kernel simd math error", 19))
+			kernel_math_error(regs, "kernel simd math error", 19))
 		return;
 
 	/*
@@ -1092,7 +1096,7 @@
 	force_sig_info(SIGFPE, &info, task);
 }
 
-asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs)
 {
 }
 
@@ -1149,8 +1153,10 @@
 	set_intr_gate(0, &divide_error);
 	set_intr_gate_ist(1, &debug, DEBUG_STACK);
 	set_intr_gate_ist(2, &nmi, NMI_STACK);
- 	set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */
-	set_system_gate(4, &overflow); /* int4 can be called from all */
+	/* int3 can be called from all */
+	set_system_gate_ist(3, &int3, DEBUG_STACK);
+	/* int4 can be called from all */
+	set_system_gate(4, &overflow);
 	set_intr_gate(5, &bounds);
 	set_intr_gate(6, &invalid_op);
 	set_intr_gate(7, &device_not_available);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 8f98e9d..161bb85 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -104,7 +104,7 @@
 /*
  * Read TSC and the reference counters. Take care of SMI disturbance
  */
-static u64 tsc_read_refs(u64 *pm, u64 *hpet)
+static u64 tsc_read_refs(u64 *p, int hpet)
 {
 	u64 t1, t2;
 	int i;
@@ -112,9 +112,9 @@
 	for (i = 0; i < MAX_RETRIES; i++) {
 		t1 = get_cycles();
 		if (hpet)
-			*hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
+			*p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
 		else
-			*pm = acpi_pm_read_early();
+			*p = acpi_pm_read_early();
 		t2 = get_cycles();
 		if ((t2 - t1) < SMI_TRESHOLD)
 			return t2;
@@ -123,13 +123,59 @@
 }
 
 /*
+ * Calculate the TSC frequency from HPET reference
+ */
+static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
+{
+	u64 tmp;
+
+	if (hpet2 < hpet1)
+		hpet2 += 0x100000000ULL;
+	hpet2 -= hpet1;
+	tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
+	do_div(tmp, 1000000);
+	do_div(deltatsc, tmp);
+
+	return (unsigned long) deltatsc;
+}
+
+/*
+ * Calculate the TSC frequency from PMTimer reference
+ */
+static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
+{
+	u64 tmp;
+
+	if (!pm1 && !pm2)
+		return ULONG_MAX;
+
+	if (pm2 < pm1)
+		pm2 += (u64)ACPI_PM_OVRRUN;
+	pm2 -= pm1;
+	tmp = pm2 * 1000000000LL;
+	do_div(tmp, PMTMR_TICKS_PER_SEC);
+	do_div(deltatsc, tmp);
+
+	return (unsigned long) deltatsc;
+}
+
+#define CAL_MS		10
+#define CAL_LATCH	(CLOCK_TICK_RATE / (1000 / CAL_MS))
+#define CAL_PIT_LOOPS	1000
+
+#define CAL2_MS		50
+#define CAL2_LATCH	(CLOCK_TICK_RATE / (1000 / CAL2_MS))
+#define CAL2_PIT_LOOPS	5000
+
+
+/*
  * Try to calibrate the TSC against the Programmable
  * Interrupt Timer and return the frequency of the TSC
  * in kHz.
  *
  * Return ULONG_MAX on failure to calibrate.
  */
-static unsigned long pit_calibrate_tsc(void)
+static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
 {
 	u64 tsc, t1, t2, delta;
 	unsigned long tscmin, tscmax;
@@ -144,8 +190,8 @@
 	 * (LSB then MSB) to begin countdown.
 	 */
 	outb(0xb0, 0x43);
-	outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
-	outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
+	outb(latch & 0xff, 0x42);
+	outb(latch >> 8, 0x42);
 
 	tsc = t1 = t2 = get_cycles();
 
@@ -166,31 +212,154 @@
 	/*
 	 * Sanity checks:
 	 *
-	 * If we were not able to read the PIT more than 5000
+	 * If we were not able to read the PIT more than loopmin
 	 * times, then we have been hit by a massive SMI
 	 *
 	 * If the maximum is 10 times larger than the minimum,
 	 * then we got hit by an SMI as well.
 	 */
-	if (pitcnt < 5000 || tscmax > 10 * tscmin)
+	if (pitcnt < loopmin || tscmax > 10 * tscmin)
 		return ULONG_MAX;
 
 	/* Calculate the PIT value */
 	delta = t2 - t1;
-	do_div(delta, 50);
+	do_div(delta, ms);
 	return delta;
 }
 
+/*
+ * This reads the current MSB of the PIT counter, and
+ * checks if we are running on sufficiently fast and
+ * non-virtualized hardware.
+ *
+ * Our expectations are:
+ *
+ *  - the PIT is running at roughly 1.19MHz
+ *
+ *  - each IO is going to take about 1us on real hardware,
+ *    but we allow it to be much faster (by a factor of 10) or
+ *    _slightly_ slower (ie we allow up to a 2us read+counter
+ *    update - anything else implies a unacceptably slow CPU
+ *    or PIT for the fast calibration to work.
+ *
+ *  - with 256 PIT ticks to read the value, we have 214us to
+ *    see the same MSB (and overhead like doing a single TSC
+ *    read per MSB value etc).
+ *
+ *  - We're doing 2 reads per loop (LSB, MSB), and we expect
+ *    them each to take about a microsecond on real hardware.
+ *    So we expect a count value of around 100. But we'll be
+ *    generous, and accept anything over 50.
+ *
+ *  - if the PIT is stuck, and we see *many* more reads, we
+ *    return early (and the next caller of pit_expect_msb()
+ *    then consider it a failure when they don't see the
+ *    next expected value).
+ *
+ * These expectations mean that we know that we have seen the
+ * transition from one expected value to another with a fairly
+ * high accuracy, and we didn't miss any events. We can thus
+ * use the TSC value at the transitions to calculate a pretty
+ * good value for the TSC frequencty.
+ */
+static inline int pit_expect_msb(unsigned char val)
+{
+	int count = 0;
+
+	for (count = 0; count < 50000; count++) {
+		/* Ignore LSB */
+		inb(0x42);
+		if (inb(0x42) != val)
+			break;
+	}
+	return count > 50;
+}
+
+/*
+ * How many MSB values do we want to see? We aim for a
+ * 15ms calibration, which assuming a 2us counter read
+ * error should give us roughly 150 ppm precision for
+ * the calibration.
+ */
+#define QUICK_PIT_MS 15
+#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
+
+static unsigned long quick_pit_calibrate(void)
+{
+	/* Set the Gate high, disable speaker */
+	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+	/*
+	 * Counter 2, mode 0 (one-shot), binary count
+	 *
+	 * NOTE! Mode 2 decrements by two (and then the
+	 * output is flipped each time, giving the same
+	 * final output frequency as a decrement-by-one),
+	 * so mode 0 is much better when looking at the
+	 * individual counts.
+	 */
+	outb(0xb0, 0x43);
+
+	/* Start at 0xffff */
+	outb(0xff, 0x42);
+	outb(0xff, 0x42);
+
+	if (pit_expect_msb(0xff)) {
+		int i;
+		u64 t1, t2, delta;
+		unsigned char expect = 0xfe;
+
+		t1 = get_cycles();
+		for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) {
+			if (!pit_expect_msb(expect))
+				goto failed;
+		}
+		t2 = get_cycles();
+
+		/*
+		 * Make sure we can rely on the second TSC timestamp:
+		 */
+		if (!pit_expect_msb(expect))
+			goto failed;
+
+		/*
+		 * Ok, if we get here, then we've seen the
+		 * MSB of the PIT decrement QUICK_PIT_ITERATIONS
+		 * times, and each MSB had many hits, so we never
+		 * had any sudden jumps.
+		 *
+		 * As a result, we can depend on there not being
+		 * any odd delays anywhere, and the TSC reads are
+		 * reliable.
+		 *
+		 * kHz = ticks / time-in-seconds / 1000;
+		 * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000
+		 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000)
+		 */
+		delta = (t2 - t1)*PIT_TICK_RATE;
+		do_div(delta, QUICK_PIT_ITERATIONS*256*1000);
+		printk("Fast TSC calibration using PIT\n");
+		return delta;
+	}
+failed:
+	return 0;
+}
 
 /**
  * native_calibrate_tsc - calibrate the tsc on boot
  */
 unsigned long native_calibrate_tsc(void)
 {
-	u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2;
+	u64 tsc1, tsc2, delta, ref1, ref2;
 	unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
-	unsigned long flags;
-	int hpet = is_hpet_enabled(), i;
+	unsigned long flags, latch, ms, fast_calibrate;
+	int hpet = is_hpet_enabled(), i, loopmin;
+
+	local_irq_save(flags);
+	fast_calibrate = quick_pit_calibrate();
+	local_irq_restore(flags);
+	if (fast_calibrate)
+		return fast_calibrate;
 
 	/*
 	 * Run 5 calibration loops to get the lowest frequency value
@@ -216,7 +385,13 @@
 	 * calibration delay loop as we have to wait for a certain
 	 * amount of time anyway.
 	 */
-	for (i = 0; i < 5; i++) {
+
+	/* Preset PIT loop values */
+	latch = CAL_LATCH;
+	ms = CAL_MS;
+	loopmin = CAL_PIT_LOOPS;
+
+	for (i = 0; i < 3; i++) {
 		unsigned long tsc_pit_khz;
 
 		/*
@@ -226,16 +401,16 @@
 		 * read the end value.
 		 */
 		local_irq_save(flags);
-		tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
-		tsc_pit_khz = pit_calibrate_tsc();
-		tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
+		tsc1 = tsc_read_refs(&ref1, hpet);
+		tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
+		tsc2 = tsc_read_refs(&ref2, hpet);
 		local_irq_restore(flags);
 
 		/* Pick the lowest PIT TSC calibration so far */
 		tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
 
 		/* hpet or pmtimer available ? */
-		if (!hpet && !pm1 && !pm2)
+		if (!hpet && !ref1 && !ref2)
 			continue;
 
 		/* Check, whether the sampling was disturbed by an SMI */
@@ -243,23 +418,41 @@
 			continue;
 
 		tsc2 = (tsc2 - tsc1) * 1000000LL;
+		if (hpet)
+			tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
+		else
+			tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
 
-		if (hpet) {
-			if (hpet2 < hpet1)
-				hpet2 += 0x100000000ULL;
-			hpet2 -= hpet1;
-			tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
-			do_div(tsc1, 1000000);
-		} else {
-			if (pm2 < pm1)
-				pm2 += (u64)ACPI_PM_OVRRUN;
-			pm2 -= pm1;
-			tsc1 = pm2 * 1000000000LL;
-			do_div(tsc1, PMTMR_TICKS_PER_SEC);
+		tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
+
+		/* Check the reference deviation */
+		delta = ((u64) tsc_pit_min) * 100;
+		do_div(delta, tsc_ref_min);
+
+		/*
+		 * If both calibration results are inside a 10% window
+		 * then we can be sure, that the calibration
+		 * succeeded. We break out of the loop right away. We
+		 * use the reference value, as it is more precise.
+		 */
+		if (delta >= 90 && delta <= 110) {
+			printk(KERN_INFO
+			       "TSC: PIT calibration matches %s. %d loops\n",
+			       hpet ? "HPET" : "PMTIMER", i + 1);
+			return tsc_ref_min;
 		}
 
-		do_div(tsc2, tsc1);
-		tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
+		/*
+		 * Check whether PIT failed more than once. This
+		 * happens in virtualized environments. We need to
+		 * give the virtual PC a slightly longer timeframe for
+		 * the HPET/PMTIMER to make the result precise.
+		 */
+		if (i == 1 && tsc_pit_min == ULONG_MAX) {
+			latch = CAL2_LATCH;
+			ms = CAL2_MS;
+			loopmin = CAL2_PIT_LOOPS;
+		}
 	}
 
 	/*
@@ -270,7 +463,7 @@
 		printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n");
 
 		/* We don't have an alternative source, disable TSC */
-		if (!hpet && !pm1 && !pm2) {
+		if (!hpet && !ref1 && !ref2) {
 			printk("TSC: No reference (HPET/PMTIMER) available\n");
 			return 0;
 		}
@@ -278,7 +471,7 @@
 		/* The alternative source failed as well, disable TSC */
 		if (tsc_ref_min == ULONG_MAX) {
 			printk(KERN_WARNING "TSC: HPET/PMTIMER calibration "
-			       "failed due to SMI disturbance.\n");
+			       "failed.\n");
 			return 0;
 		}
 
@@ -290,44 +483,25 @@
 	}
 
 	/* We don't have an alternative source, use the PIT calibration value */
-	if (!hpet && !pm1 && !pm2) {
+	if (!hpet && !ref1 && !ref2) {
 		printk(KERN_INFO "TSC: Using PIT calibration value\n");
 		return tsc_pit_min;
 	}
 
 	/* The alternative source failed, use the PIT calibration value */
 	if (tsc_ref_min == ULONG_MAX) {
-		printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due "
-		       "to SMI disturbance. Using PIT calibration\n");
+		printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. "
+		       "Using PIT calibration\n");
 		return tsc_pit_min;
 	}
 
-	/* Check the reference deviation */
-	delta = ((u64) tsc_pit_min) * 100;
-	do_div(delta, tsc_ref_min);
-
-	/*
-	 * If both calibration results are inside a 5% window, the we
-	 * use the lower frequency of those as it is probably the
-	 * closest estimate.
-	 */
-	if (delta >= 95 && delta <= 105) {
-		printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n",
-		       hpet ? "HPET" : "PMTIMER");
-		printk(KERN_INFO "TSC: using %s calibration value\n",
-		       tsc_pit_min <= tsc_ref_min ? "PIT" :
-		       hpet ? "HPET" : "PMTIMER");
-		return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min;
-	}
-
-	printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
-	       hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
-
 	/*
 	 * The calibration values differ too much. In doubt, we use
 	 * the PIT value as we know that there are PMTIMERs around
-	 * running at double speed.
+	 * running at double speed. At least we let the user know:
 	 */
+	printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
+	       hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
 	printk(KERN_INFO "TSC: Using PIT calibration value\n");
 	return tsc_pit_min;
 }
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 594ef47..61a97e6 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -25,45 +25,31 @@
 #include <asm/visws/cobalt.h>
 #include <asm/visws/piix4.h>
 #include <asm/arch_hooks.h>
+#include <asm/io_apic.h>
 #include <asm/fixmap.h>
 #include <asm/reboot.h>
 #include <asm/setup.h>
 #include <asm/e820.h>
-#include <asm/smp.h>
 #include <asm/io.h>
 
 #include <mach_ipi.h>
 
 #include "mach_apic.h"
 
-#include <linux/init.h>
-#include <linux/smp.h>
-
 #include <linux/kernel_stat.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
 
-#include <asm/io.h>
-#include <asm/apic.h>
 #include <asm/i8259.h>
 #include <asm/irq_vectors.h>
-#include <asm/visws/cobalt.h>
 #include <asm/visws/lithium.h>
-#include <asm/visws/piix4.h>
 
 #include <linux/sched.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
 
 extern int no_broadcast;
 
-#include <asm/io.h>
 #include <asm/apic.h>
-#include <asm/arch_hooks.h>
-#include <asm/visws/cobalt.h>
-#include <asm/visws/lithium.h>
 
 char visws_board_type	= -1;
 char visws_board_rev	= -1;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 38f566f..4eeb5cf 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -46,6 +46,7 @@
 #include <asm/io.h>
 #include <asm/tlbflush.h>
 #include <asm/irq.h>
+#include <asm/syscalls.h>
 
 /*
  * Known problems:
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index edfb09f..8c9ad02 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -393,13 +393,13 @@
 }
 #endif
 
-static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
 {
 	vmi_set_page_type(pfn, VMI_PAGE_L1);
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
 }
 
-static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn)
 {
  	/*
 	 * This call comes in very early, before mem_map is setup.
@@ -410,20 +410,20 @@
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
 }
 
-static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
+static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count)
 {
  	vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
 	vmi_check_page_type(clonepfn, VMI_PAGE_L2);
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
 }
 
-static void vmi_release_pte(u32 pfn)
+static void vmi_release_pte(unsigned long pfn)
 {
 	vmi_ops.release_page(pfn, VMI_PAGE_L1);
 	vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
 }
 
-static void vmi_release_pmd(u32 pfn)
+static void vmi_release_pmd(unsigned long pfn)
 {
 	vmi_ops.release_page(pfn, VMI_PAGE_L2);
 	vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c
index 01b868b..321cf72 100644
--- a/arch/x86/lib/msr-on-cpu.c
+++ b/arch/x86/lib/msr-on-cpu.c
@@ -16,37 +16,46 @@
 	rdmsr(rv->msr_no, rv->l, rv->h);
 }
 
-static void __rdmsr_safe_on_cpu(void *info)
+static void __wrmsr_on_cpu(void *info)
 {
 	struct msr_info *rv = info;
 
-	rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h);
+	wrmsr(rv->msr_no, rv->l, rv->h);
 }
 
-static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe)
+int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
-	int err = 0;
+	int err;
 	struct msr_info rv;
 
 	rv.msr_no = msr_no;
-	if (safe) {
-		err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu,
-					       &rv, 1);
-		err = err ? err : rv.err;
-	} else {
-		err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
-	}
+	err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
 	*l = rv.l;
 	*h = rv.h;
 
 	return err;
 }
 
-static void __wrmsr_on_cpu(void *info)
+int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+	int err;
+	struct msr_info rv;
+
+	rv.msr_no = msr_no;
+	rv.l = l;
+	rv.h = h;
+	err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
+
+	return err;
+}
+
+/* These "safe" variants are slower and should be used when the target MSR
+   may not actually exist. */
+static void __rdmsr_safe_on_cpu(void *info)
 {
 	struct msr_info *rv = info;
 
-	wrmsr(rv->msr_no, rv->l, rv->h);
+	rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h);
 }
 
 static void __wrmsr_safe_on_cpu(void *info)
@@ -56,45 +65,30 @@
 	rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h);
 }
 
-static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe)
+int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
-	int err = 0;
+	int err;
+	struct msr_info rv;
+
+	rv.msr_no = msr_no;
+	err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
+	*l = rv.l;
+	*h = rv.h;
+
+	return err ? err : rv.err;
+}
+
+int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+	int err;
 	struct msr_info rv;
 
 	rv.msr_no = msr_no;
 	rv.l = l;
 	rv.h = h;
-	if (safe) {
-		err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu,
-					       &rv, 1);
-		err = err ? err : rv.err;
-	} else {
-		err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
-	}
+	err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
 
-	return err;
-}
-
-int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
-	return _wrmsr_on_cpu(cpu, msr_no, l, h, 0);
-}
-
-int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	return _rdmsr_on_cpu(cpu, msr_no, l, h, 0);
-}
-
-/* These "safe" variants are slower and should be used when the target MSR
-   may not actually exist. */
-int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
-	return _wrmsr_on_cpu(cpu, msr_no, l, h, 1);
-}
-
-int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	return _rdmsr_on_cpu(cpu, msr_no, l, h, 1);
+	return err ? err : rv.err;
 }
 
 EXPORT_SYMBOL(rdmsr_on_cpu);
diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c
index 94972e7..82004d2 100644
--- a/arch/x86/lib/string_32.c
+++ b/arch/x86/lib/string_32.c
@@ -22,7 +22,7 @@
 		"testb %%al,%%al\n\t"
 		"jne 1b"
 		: "=&S" (d0), "=&D" (d1), "=&a" (d2)
-		:"0" (src), "1" (dest) : "memory");
+		: "0" (src), "1" (dest) : "memory");
 	return dest;
 }
 EXPORT_SYMBOL(strcpy);
@@ -42,7 +42,7 @@
 		"stosb\n"
 		"2:"
 		: "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
-		:"0" (src), "1" (dest), "2" (count) : "memory");
+		: "0" (src), "1" (dest), "2" (count) : "memory");
 	return dest;
 }
 EXPORT_SYMBOL(strncpy);
@@ -60,7 +60,7 @@
 		"testb %%al,%%al\n\t"
 		"jne 1b"
 		: "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
-		: "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory");
+		: "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu) : "memory");
 	return dest;
 }
 EXPORT_SYMBOL(strcat);
@@ -105,9 +105,9 @@
 		"2:\tsbbl %%eax,%%eax\n\t"
 		"orb $1,%%al\n"
 		"3:"
-		:"=a" (res), "=&S" (d0), "=&D" (d1)
-		:"1" (cs), "2" (ct)
-		:"memory");
+		: "=a" (res), "=&S" (d0), "=&D" (d1)
+		: "1" (cs), "2" (ct)
+		: "memory");
 	return res;
 }
 EXPORT_SYMBOL(strcmp);
@@ -130,9 +130,9 @@
 		"3:\tsbbl %%eax,%%eax\n\t"
 		"orb $1,%%al\n"
 		"4:"
-		:"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
-		:"1" (cs), "2" (ct), "3" (count)
-		:"memory");
+		: "=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+		: "1" (cs), "2" (ct), "3" (count)
+		: "memory");
 	return res;
 }
 EXPORT_SYMBOL(strncmp);
@@ -152,9 +152,9 @@
 		"movl $1,%1\n"
 		"2:\tmovl %1,%0\n\t"
 		"decl %0"
-		:"=a" (res), "=&S" (d0)
-		:"1" (s), "0" (c)
-		:"memory");
+		: "=a" (res), "=&S" (d0)
+		: "1" (s), "0" (c)
+		: "memory");
 	return res;
 }
 EXPORT_SYMBOL(strchr);
@@ -169,9 +169,9 @@
 		"scasb\n\t"
 		"notl %0\n\t"
 		"decl %0"
-		:"=c" (res), "=&D" (d0)
-		:"1" (s), "a" (0), "0" (0xffffffffu)
-		:"memory");
+		: "=c" (res), "=&D" (d0)
+		: "1" (s), "a" (0), "0" (0xffffffffu)
+		: "memory");
 	return res;
 }
 EXPORT_SYMBOL(strlen);
@@ -189,9 +189,9 @@
 		"je 1f\n\t"
 		"movl $1,%0\n"
 		"1:\tdecl %0"
-		:"=D" (res), "=&c" (d0)
-		:"a" (c), "0" (cs), "1" (count)
-		:"memory");
+		: "=D" (res), "=&c" (d0)
+		: "a" (c), "0" (cs), "1" (count)
+		: "memory");
 	return res;
 }
 EXPORT_SYMBOL(memchr);
@@ -228,9 +228,9 @@
 		"cmpl $-1,%1\n\t"
 		"jne 1b\n"
 		"3:\tsubl %2,%0"
-		:"=a" (res), "=&d" (d0)
-		:"c" (s), "1" (count)
-		:"memory");
+		: "=a" (res), "=&d" (d0)
+		: "c" (s), "1" (count)
+		: "memory");
 	return res;
 }
 EXPORT_SYMBOL(strnlen);
diff --git a/arch/x86/lib/strstr_32.c b/arch/x86/lib/strstr_32.c
index 42e8a50..8e2d55f 100644
--- a/arch/x86/lib/strstr_32.c
+++ b/arch/x86/lib/strstr_32.c
@@ -23,9 +23,9 @@
 	"jne 1b\n\t"
 	"xorl %%eax,%%eax\n\t"
 	"2:"
-	:"=a" (__res), "=&c" (d0), "=&S" (d1)
-	:"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
-	:"dx", "di");
+	: "=a" (__res), "=&c" (d0), "=&S" (d1)
+	: "0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
+	: "dx", "di");
 return __res;
 }
 
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 3d31783..3f2cf11 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -10,13 +10,15 @@
 #include <asm/e820.h>
 #include <asm/setup.h>
 
+#include <mach_ipi.h>
+
 #ifdef CONFIG_HOTPLUG_CPU
 #define DEFAULT_SEND_IPI	(1)
 #else
 #define DEFAULT_SEND_IPI	(0)
 #endif
 
-int no_broadcast=DEFAULT_SEND_IPI;
+int no_broadcast = DEFAULT_SEND_IPI;
 
 /**
  * pre_intr_init_hook - initialisation prior to setting up interrupt vectors
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 62fa440..847c164 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -328,7 +328,7 @@
 
 	get_memcfg_numa();
 
-	kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
+	kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
 
 	kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
 	do {
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index a20d1fa..e7277cb 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -148,8 +148,8 @@
 	 * we have now. "break" is either changing perms, levels or
 	 * address space marker.
 	 */
-	prot = pgprot_val(new_prot) & ~(PTE_PFN_MASK);
-	cur = pgprot_val(st->current_prot) & ~(PTE_PFN_MASK);
+	prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
+	cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
 
 	if (!st->level) {
 		/* First entry */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 455f3fe..8f92cac 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -35,6 +35,7 @@
 #include <asm/tlbflush.h>
 #include <asm/proto.h>
 #include <asm-generic/sections.h>
+#include <asm/traps.h>
 
 /*
  * Page fault error code bits
@@ -357,8 +358,6 @@
 	return 0;
 }
 
-void do_invalid_op(struct pt_regs *, unsigned long);
-
 static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 {
 #ifdef CONFIG_X86_F00F_BUG
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 60ec1d0..6b9a935 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -47,6 +47,7 @@
 #include <asm/paravirt.h>
 #include <asm/setup.h>
 #include <asm/cacheflush.h>
+#include <asm/smp.h>
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d3746ef..770536e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -225,7 +225,7 @@
 void __init cleanup_highmap(void)
 {
 	unsigned long vaddr = __START_KERNEL_map;
-	unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1;
+	unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1;
 	pmd_t *pmd = level2_kernel_pgt;
 	pmd_t *last_pmd = pmd + PTRS_PER_PMD;
 
@@ -451,14 +451,14 @@
 	unsigned long puds, pmds, ptes, tables, start;
 
 	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
-	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
+	tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
 	if (direct_gbpages) {
 		unsigned long extra;
 		extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
 		pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
 	} else
 		pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
-	tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+	tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
 
 	if (cpu_has_pse) {
 		unsigned long extra;
@@ -466,7 +466,7 @@
 		ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	} else
 		ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
+	tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
 
 	/*
 	 * RED-PEN putting page tables only on node 0 could
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d4b6e6a..cac6da5 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -421,7 +421,7 @@
 	return;
 }
 
-int __initdata early_ioremap_debug;
+static int __initdata early_ioremap_debug;
 
 static int __init early_ioremap_debug_setup(char *str)
 {
@@ -547,7 +547,7 @@
 }
 
 
-int __initdata early_ioremap_nested;
+static int __initdata early_ioremap_nested;
 
 static int __init check_early_ioremap_leak(void)
 {
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a4dd793..cebcbf1 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -79,7 +79,7 @@
 		return 0;
 
 	addr = 0x8000;
-	nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
+	nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
 	nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT,
 				      nodemap_size, L1_CACHE_BYTES);
 	if (nodemap_addr == -1UL) {
@@ -176,10 +176,10 @@
 	unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
 	unsigned long bootmap_start, nodedata_phys;
 	void *bootmap;
-	const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
+	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
 	int nid;
 
-	start = round_up(start, ZONE_ALIGN);
+	start = roundup(start, ZONE_ALIGN);
 
 	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
 	       start, end);
@@ -210,9 +210,9 @@
 	bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
 	nid = phys_to_nid(nodedata_phys);
 	if (nid == nodeid)
-		bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+		bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
 	else
-		bootmap_start = round_up(start, PAGE_SIZE);
+		bootmap_start = roundup(start, PAGE_SIZE);
 	/*
 	 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like
 	 * to use that to align to PAGE_SIZE
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 43e2f84..898fad6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -84,7 +84,7 @@
 
 static inline unsigned long highmap_end_pfn(void)
 {
-	return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
+	return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
 }
 
 #endif
@@ -906,11 +906,13 @@
 {
 	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
 }
+EXPORT_SYMBOL_GPL(set_memory_ro);
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
 	return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
 }
+EXPORT_SYMBOL_GPL(set_memory_rw);
 
 int set_memory_np(unsigned long addr, int numpages)
 {
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index d503027..86f2ffc 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -63,10 +63,8 @@
 #define UNSHARED_PTRS_PER_PGD				\
 	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
 
-static void pgd_ctor(void *p)
+static void pgd_ctor(pgd_t *pgd)
 {
-	pgd_t *pgd = p;
-
 	/* If the pgd points to a shared pagetable level (either the
 	   ptes in non-PAE, or shared PMD in PAE), then just copy the
 	   references from swapper_pg_dir. */
@@ -87,7 +85,7 @@
 		pgd_list_add(pgd);
 }
 
-static void pgd_dtor(void *pgd)
+static void pgd_dtor(pgd_t *pgd)
 {
 	unsigned long flags; /* can be called from interrupt context */
 
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index cab0abb..0951db9 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -123,7 +123,8 @@
 	if (!arg)
 		return -EINVAL;
 
-	__VMALLOC_RESERVE = memparse(arg, &arg);
+	/* Add VMALLOC_OFFSET to the parsed value due to vm area guard hole*/
+	__VMALLOC_RESERVE = memparse(arg, &arg) + VMALLOC_OFFSET;
 	return 0;
 }
 early_param("vmalloc", parse_vmalloc);
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 56b4757..43ac5af 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -10,11 +10,12 @@
 
 #include <linux/oprofile.h>
 #include <linux/smp.h>
+#include <linux/ptrace.h>
+#include <linux/nmi.h>
 #include <asm/msr.h>
-#include <asm/ptrace.h>
 #include <asm/fixmap.h>
 #include <asm/apic.h>
-#include <asm/nmi.h>
+
 
 #include "op_x86_model.h"
 #include "op_counter.h"
@@ -40,7 +41,7 @@
 static inline void setup_num_counters(void)
 {
 #ifdef CONFIG_SMP
-	if (smp_num_siblings == 2){
+	if (smp_num_siblings == 2) {
 		num_counters = NUM_COUNTERS_HT2;
 		num_controls = NUM_CONTROLS_HT2;
 	}
@@ -86,7 +87,7 @@
 #define CTR_FLAME_2    (1 << 6)
 #define CTR_IQ_5       (1 << 7)
 
-static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
 	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
 	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
 	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
@@ -97,32 +98,32 @@
 	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
 };
 
-#define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
 
 /* p4 event codes in libop/op_event.h are indices into this table. */
 
 static struct p4_event_binding p4_events[NUM_EVENTS] = {
-	
+
 	{ /* BRANCH_RETIRED */
-		0x05, 0x06, 
+		0x05, 0x06,
 		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
 		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 	},
-	
+
 	{ /* MISPRED_BRANCH_RETIRED */
-		0x04, 0x03, 
+		0x04, 0x03,
 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 	},
-	
+
 	{ /* TC_DELIVER_MODE */
 		0x01, 0x01,
-		{ { CTR_MS_0, MSR_P4_TC_ESCR0},  
+		{ { CTR_MS_0, MSR_P4_TC_ESCR0},
 		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
 	},
-	
+
 	{ /* BPU_FETCH_REQUEST */
-		0x00, 0x03, 
+		0x00, 0x03,
 		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
 		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
 	},
@@ -146,7 +147,7 @@
 	},
 
 	{ /* LOAD_PORT_REPLAY */
-		0x02, 0x04, 
+		0x02, 0x04,
 		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
 	},
@@ -170,43 +171,43 @@
 	},
 
 	{ /* BSQ_CACHE_REFERENCE */
-		0x07, 0x0c, 
+		0x07, 0x0c,
 		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
 		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
 	},
 
 	{ /* IOQ_ALLOCATION */
-		0x06, 0x03, 
+		0x06, 0x03,
 		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 		  { 0, 0 } }
 	},
 
 	{ /* IOQ_ACTIVE_ENTRIES */
-		0x06, 0x1a, 
+		0x06, 0x1a,
 		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
 		  { 0, 0 } }
 	},
 
 	{ /* FSB_DATA_ACTIVITY */
-		0x06, 0x17, 
+		0x06, 0x17,
 		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
 	},
 
 	{ /* BSQ_ALLOCATION */
-		0x07, 0x05, 
+		0x07, 0x05,
 		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
 		  { 0, 0 } }
 	},
 
 	{ /* BSQ_ACTIVE_ENTRIES */
 		0x07, 0x06,
-		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
+		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
 		  { 0, 0 } }
 	},
 
 	{ /* X87_ASSIST */
-		0x05, 0x03, 
+		0x05, 0x03,
 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 	},
@@ -216,21 +217,21 @@
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* PACKED_SP_UOP */
-		0x01, 0x08, 
+		0x01, 0x08,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* PACKED_DP_UOP */
-		0x01, 0x0c, 
+		0x01, 0x0c,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
 
 	{ /* SCALAR_SP_UOP */
-		0x01, 0x0a, 
+		0x01, 0x0a,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
@@ -242,31 +243,31 @@
 	},
 
 	{ /* 64BIT_MMX_UOP */
-		0x01, 0x02, 
+		0x01, 0x02,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* 128BIT_MMX_UOP */
-		0x01, 0x1a, 
+		0x01, 0x1a,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
 
 	{ /* X87_FP_UOP */
-		0x01, 0x04, 
+		0x01, 0x04,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* X87_SIMD_MOVES_UOP */
-		0x01, 0x2e, 
+		0x01, 0x2e,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* MACHINE_CLEAR */
-		0x05, 0x02, 
+		0x05, 0x02,
 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 	},
@@ -276,9 +277,9 @@
 		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
 	},
-  
+
 	{ /* TC_MS_XFER */
-		0x00, 0x05, 
+		0x00, 0x05,
 		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
 		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
 	},
@@ -308,7 +309,7 @@
 	},
 
 	{ /* INSTR_RETIRED */
-		0x04, 0x02, 
+		0x04, 0x02,
 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 	},
@@ -319,14 +320,14 @@
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 	},
 
-	{ /* UOP_TYPE */    
-		0x02, 0x02, 
+	{ /* UOP_TYPE */
+		0x02, 0x02,
 		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
 		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
 	},
 
 	{ /* RETIRED_MISPRED_BRANCH_TYPE */
-		0x02, 0x05, 
+		0x02, 0x05,
 		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
 		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
 	},
@@ -349,8 +350,8 @@
 #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
 #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
 #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
-#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
-#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
+#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
 
 #define CCCR_RESERVED_BITS 0x38030FFF
 #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
@@ -360,15 +361,15 @@
 #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
 #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
 #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
-#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
-#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
 
-#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
-#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
+#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
+#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
+#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
 #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
 
 
@@ -380,7 +381,7 @@
 #ifdef CONFIG_SMP
 	int cpu = smp_processor_id();
 	return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
-#endif	
+#endif
 	return 0;
 }
 
@@ -395,25 +396,23 @@
 
 static void p4_fill_in_addresses(struct op_msrs * const msrs)
 {
-	unsigned int i; 
+	unsigned int i;
 	unsigned int addr, cccraddr, stag;
 
 	setup_num_counters();
 	stag = get_stagger();
 
 	/* initialize some registers */
-	for (i = 0; i < num_counters; ++i) {
+	for (i = 0; i < num_counters; ++i)
 		msrs->counters[i].addr = 0;
-	}
-	for (i = 0; i < num_controls; ++i) {
+	for (i = 0; i < num_controls; ++i)
 		msrs->controls[i].addr = 0;
-	}
-	
+
 	/* the counter & cccr registers we pay attention to */
 	for (i = 0; i < num_counters; ++i) {
 		addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
 		cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
-		if (reserve_perfctr_nmi(addr)){
+		if (reserve_perfctr_nmi(addr)) {
 			msrs->counters[i].addr = addr;
 			msrs->controls[i].addr = cccraddr;
 		}
@@ -447,22 +446,22 @@
 		if (reserve_evntsel_nmi(addr))
 			msrs->controls[i].addr = addr;
 	}
-	
+
 	for (addr = MSR_P4_MS_ESCR0 + stag;
-	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
+	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
 		if (reserve_evntsel_nmi(addr))
 			msrs->controls[i].addr = addr;
 	}
-	
+
 	for (addr = MSR_P4_IX_ESCR0 + stag;
-	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
+	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
 		if (reserve_evntsel_nmi(addr))
 			msrs->controls[i].addr = addr;
 	}
 
 	/* there are 2 remaining non-contiguously located ESCRs */
 
-	if (num_counters == NUM_COUNTERS_NON_HT) {		
+	if (num_counters == NUM_COUNTERS_NON_HT) {
 		/* standard non-HT CPUs handle both remaining ESCRs*/
 		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
 			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
@@ -498,20 +497,20 @@
 	unsigned int stag;
 
 	stag = get_stagger();
-	
+
 	/* convert from counter *number* to counter *bit* */
 	counter_bit = 1 << VIRT_CTR(stag, ctr);
-	
+
 	/* find our event binding structure. */
 	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
-		printk(KERN_ERR 
-		       "oprofile: P4 event code 0x%lx out of range\n", 
+		printk(KERN_ERR
+		       "oprofile: P4 event code 0x%lx out of range\n",
 		       counter_config[ctr].event);
 		return;
 	}
-	
+
 	ev = &(p4_events[counter_config[ctr].event - 1]);
-	
+
 	for (i = 0; i < maxbind; i++) {
 		if (ev->bindings[i].virt_counter & counter_bit) {
 
@@ -526,25 +525,24 @@
 				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
 			}
 			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
-			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);			
+			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
 			ESCR_WRITE(escr, high, ev, i);
-		       
+
 			/* modify CCCR */
 			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
 			CCCR_CLEAR(cccr);
 			CCCR_SET_REQUIRED_BITS(cccr);
 			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
-			if (stag == 0) {
+			if (stag == 0)
 				CCCR_SET_PMI_OVF_0(cccr);
-			} else {
+			else
 				CCCR_SET_PMI_OVF_1(cccr);
-			}
 			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
 			return;
 		}
 	}
 
-	printk(KERN_ERR 
+	printk(KERN_ERR
 	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
 	       counter_config[ctr].event, stag, ctr);
 }
@@ -559,14 +557,14 @@
 	stag = get_stagger();
 
 	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
-	if (! MISC_PMC_ENABLED_P(low)) {
+	if (!MISC_PMC_ENABLED_P(low)) {
 		printk(KERN_ERR "oprofile: P4 PMC not available\n");
 		return;
 	}
 
 	/* clear the cccrs we will use */
 	for (i = 0 ; i < num_counters ; i++) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 			continue;
 		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 		CCCR_CLEAR(low);
@@ -576,14 +574,14 @@
 
 	/* clear all escrs (including those outside our concern) */
 	for (i = num_counters; i < num_controls; i++) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 			continue;
 		wrmsr(msrs->controls[i].addr, 0, 0);
 	}
 
 	/* setup all counters */
 	for (i = 0 ; i < num_counters ; ++i) {
-		if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
+		if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
 			reset_value[i] = counter_config[i].count;
 			pmc_setup_one_p4_counter(i);
 			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
@@ -603,11 +601,11 @@
 	stag = get_stagger();
 
 	for (i = 0; i < num_counters; ++i) {
-		
-		if (!reset_value[i]) 
+
+		if (!reset_value[i])
 			continue;
 
-		/* 
+		/*
 		 * there is some eccentricity in the hardware which
 		 * requires that we perform 2 extra corrections:
 		 *
@@ -616,24 +614,24 @@
 		 *
 		 * - write the counter back twice to ensure it gets
 		 *   updated properly.
-		 * 
+		 *
 		 * the former seems to be related to extra NMIs happening
 		 * during the current NMI; the latter is reported as errata
 		 * N15 in intel doc 249199-029, pentium 4 specification
 		 * update, though their suggested work-around does not
 		 * appear to solve the problem.
 		 */
-		
+
 		real = VIRT_CTR(stag, i);
 
 		CCCR_READ(low, high, real);
- 		CTR_READ(ctr, high, real);
+		CTR_READ(ctr, high, real);
 		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
 			oprofile_add_sample(regs, i);
- 			CTR_WRITE(reset_value[i], real);
+			CTR_WRITE(reset_value[i], real);
 			CCCR_CLEAR_OVF(low);
 			CCCR_WRITE(low, high, real);
- 			CTR_WRITE(reset_value[i], real);
+			CTR_WRITE(reset_value[i], real);
 		}
 	}
 
@@ -683,15 +681,16 @@
 	int i;
 
 	for (i = 0 ; i < num_counters ; ++i) {
-		if (CTR_IS_RESERVED(msrs,i))
+		if (CTR_IS_RESERVED(msrs, i))
 			release_perfctr_nmi(msrs->counters[i].addr);
 	}
-	/* some of the control registers are specially reserved in
+	/*
+	 * some of the control registers are specially reserved in
 	 * conjunction with the counter registers (hence the starting offset).
 	 * This saves a few bits.
 	 */
 	for (i = num_counters ; i < num_controls ; ++i) {
-		if (CTRL_IS_RESERVED(msrs,i))
+		if (CTRL_IS_RESERVED(msrs, i))
 			release_evntsel_nmi(msrs->controls[i].addr);
 	}
 }
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 6a0fca7..22e0576 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -580,7 +580,7 @@
 				    unsigned long action, void *hcpu)
 {
 	int cpu = (long)hcpu;
-	switch(action) {
+	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
 		smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0);
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 8e07718..006599d 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1043,35 +1043,44 @@
 		if (io_apic_assign_pci_irqs) {
 			int irq;
 
-			if (pin) {
-				/*
-				 * interrupt pins are numbered starting
-				 * from 1
-				 */
-				pin--;
-				irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-					PCI_SLOT(dev->devfn), pin);
-	/*
-	 * Busses behind bridges are typically not listed in the MP-table.
-	 * In this case we have to look up the IRQ based on the parent bus,
-	 * parent slot, and pin number. The SMP code detects such bridged
-	 * busses itself so we should get into this branch reliably.
-	 */
-				if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
-					struct pci_dev *bridge = dev->bus->self;
+			if (!pin)
+				continue;
 
-					pin = (pin + PCI_SLOT(dev->devfn)) % 4;
-					irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
-							PCI_SLOT(bridge->devfn), pin);
-					if (irq >= 0)
-						dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n",
-							 pci_name(bridge),
-							 'A' + pin, irq);
-				}
-				if (irq >= 0) {
-					dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq);
-					dev->irq = irq;
-				}
+			/*
+			 * interrupt pins are numbered starting from 1
+			 */
+			pin--;
+			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
+				PCI_SLOT(dev->devfn), pin);
+			/*
+			 * Busses behind bridges are typically not listed in the
+			 * MP-table.  In this case we have to look up the IRQ
+			 * based on the parent bus, parent slot, and pin number.
+			 * The SMP code detects such bridged busses itself so we
+			 * should get into this branch reliably.
+			 */
+			if (irq < 0 && dev->bus->parent) {
+				/* go back to the bridge */
+				struct pci_dev *bridge = dev->bus->self;
+				int bus;
+
+				pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+				bus = bridge->bus->number;
+				irq = IO_APIC_get_PCI_irq_vector(bus,
+						PCI_SLOT(bridge->devfn), pin);
+				if (irq >= 0)
+					dev_warn(&dev->dev,
+						"using bridge %s INT %c to "
+							"get IRQ %d\n",
+						 pci_name(bridge),
+						 'A' + pin, irq);
+			}
+			if (irq >= 0) {
+				dev_info(&dev->dev,
+					"PCI->APIC IRQ transform: INT %c "
+						"-> IRQ %d\n",
+					'A' + pin, irq);
+				dev->irq = irq;
 			}
 		}
 #endif
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index 4fc7e87..d1e9b53 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -1,5 +1,3 @@
-.text
-
 /*
  * This may not use any stack, nor any variable that is not "NoSave":
  *
@@ -12,17 +10,18 @@
 #include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/asm-offsets.h>
+#include <asm/processor-flags.h>
 
-	.text
+.text
 
 ENTRY(swsusp_arch_suspend)
-
 	movl %esp, saved_context_esp
 	movl %ebx, saved_context_ebx
 	movl %ebp, saved_context_ebp
 	movl %esi, saved_context_esi
 	movl %edi, saved_context_edi
-	pushfl ; popl saved_context_eflags
+	pushfl
+	popl saved_context_eflags
 
 	call swsusp_save
 	ret
@@ -59,7 +58,7 @@
 	movl	mmu_cr4_features, %ecx
 	jecxz	1f	# cr4 Pentium and higher, skip if zero
 	movl	%ecx, %edx
-	andl	$~(1<<7), %edx;  # PGE
+	andl	$~(X86_CR4_PGE), %edx
 	movl	%edx, %cr4;  # turn off PGE
 1:
 	movl	%cr3, %eax;  # flush TLB
@@ -74,7 +73,8 @@
 	movl saved_context_esi, %esi
 	movl saved_context_edi, %edi
 
-	pushl saved_context_eflags ; popfl
+	pushl saved_context_eflags
+	popfl
 
 	xorl	%eax, %eax
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a4e201b..7dcd321 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -812,7 +812,7 @@
 
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
-static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
+static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
 {
 #ifdef CONFIG_FLATMEM
 	BUG_ON(mem_map);	/* should only be used early */
@@ -822,7 +822,7 @@
 
 /* Early release_pte assumes that all pts are pinned, since there's
    only init_mm and anything attached to that is pinned. */
-static void xen_release_pte_init(u32 pfn)
+static void xen_release_pte_init(unsigned long pfn)
 {
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
@@ -838,7 +838,7 @@
 
 /* This needs to make sure the new pte page is pinned iff its being
    attached to a pinned pagetable. */
-static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
+static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
 {
 	struct page *page = pfn_to_page(pfn);
 
@@ -856,12 +856,12 @@
 	}
 }
 
-static void xen_alloc_pte(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PTE);
 }
 
-static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PMD);
 }
@@ -909,7 +909,7 @@
 }
 
 /* This should never happen until we're OK to use struct page */
-static void xen_release_ptpage(u32 pfn, unsigned level)
+static void xen_release_ptpage(unsigned long pfn, unsigned level)
 {
 	struct page *page = pfn_to_page(pfn);
 
@@ -923,23 +923,23 @@
 	}
 }
 
-static void xen_release_pte(u32 pfn)
+static void xen_release_pte(unsigned long pfn)
 {
 	xen_release_ptpage(pfn, PT_PTE);
 }
 
-static void xen_release_pmd(u32 pfn)
+static void xen_release_pmd(unsigned long pfn)
 {
 	xen_release_ptpage(pfn, PT_PMD);
 }
 
 #if PAGETABLE_LEVELS == 4
-static void xen_alloc_pud(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PUD);
 }
 
-static void xen_release_pud(u32 pfn)
+static void xen_release_pud(unsigned long pfn)
 {
 	xen_release_ptpage(pfn, PT_PUD);
 }
diff --git a/block/Makefile b/block/Makefile
index 208000b..bfe7304 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -4,8 +4,8 @@
 
 obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
-			blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
-			cmd-filter.o
+			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
+			ioctl.o genhd.o scsi_ioctl.o cmd-filter.o
 
 obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
diff --git a/block/as-iosched.c b/block/as-iosched.c
index cf4eb0e..71f0abb 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -462,7 +462,7 @@
 			del_timer(&ad->antic_timer);
 		ad->antic_status = ANTIC_FINISHED;
 		/* see as_work_handler */
-		kblockd_schedule_work(&ad->antic_work);
+		kblockd_schedule_work(ad->q, &ad->antic_work);
 	}
 }
 
@@ -483,7 +483,7 @@
 		aic = ad->io_context->aic;
 
 		ad->antic_status = ANTIC_FINISHED;
-		kblockd_schedule_work(&ad->antic_work);
+		kblockd_schedule_work(q, &ad->antic_work);
 
 		if (aic->ttime_samples == 0) {
 			/* process anticipated on has exited or timed out*/
@@ -745,6 +745,14 @@
  */
 static int as_can_anticipate(struct as_data *ad, struct request *rq)
 {
+#if 0 /* disable for now, we need to check tag level as well */
+	/*
+	 * SSD device without seek penalty, disable idling
+	 */
+	if (blk_queue_nonrot(ad->q)) axman
+		return 0;
+#endif
+
 	if (!ad->io_context)
 		/*
 		 * Last request submitted was a write
@@ -844,7 +852,7 @@
 	if (ad->changed_batch && ad->nr_dispatched == 1) {
 		ad->current_batch_expires = jiffies +
 					ad->batch_expire[ad->batch_data_dir];
-		kblockd_schedule_work(&ad->antic_work);
+		kblockd_schedule_work(q, &ad->antic_work);
 		ad->changed_batch = 0;
 
 		if (ad->batch_data_dir == REQ_SYNC)
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index a09ead1..5c99ff8 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -293,7 +293,7 @@
 	bio->bi_end_io = bio_end_empty_barrier;
 	bio->bi_private = &wait;
 	bio->bi_bdev = bdev;
-	submit_bio(1 << BIO_RW_BARRIER, bio);
+	submit_bio(WRITE_BARRIER, bio);
 
 	wait_for_completion(&wait);
 
@@ -315,3 +315,73 @@
 	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
+
+static void blkdev_discard_end_io(struct bio *bio, int err)
+{
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+	}
+
+	bio_put(bio);
+}
+
+/**
+ * blkdev_issue_discard - queue a discard
+ * @bdev:	blockdev to issue discard for
+ * @sector:	start sector
+ * @nr_sects:	number of sectors to discard
+ * @gfp_mask:	memory allocation flags (for bio_alloc)
+ *
+ * Description:
+ *    Issue a discard request for the sectors in question. Does not wait.
+ */
+int blkdev_issue_discard(struct block_device *bdev,
+			 sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
+{
+	struct request_queue *q;
+	struct bio *bio;
+	int ret = 0;
+
+	if (bdev->bd_disk == NULL)
+		return -ENXIO;
+
+	q = bdev_get_queue(bdev);
+	if (!q)
+		return -ENXIO;
+
+	if (!q->prepare_discard_fn)
+		return -EOPNOTSUPP;
+
+	while (nr_sects && !ret) {
+		bio = bio_alloc(gfp_mask, 0);
+		if (!bio)
+			return -ENOMEM;
+
+		bio->bi_end_io = blkdev_discard_end_io;
+		bio->bi_bdev = bdev;
+
+		bio->bi_sector = sector;
+
+		if (nr_sects > q->max_hw_sectors) {
+			bio->bi_size = q->max_hw_sectors << 9;
+			nr_sects -= q->max_hw_sectors;
+			sector += q->max_hw_sectors;
+		} else {
+			bio->bi_size = nr_sects << 9;
+			nr_sects = 0;
+		}
+		bio_get(bio);
+		submit_bio(DISCARD_BARRIER, bio);
+
+		/* Check if it failed immediately */
+		if (bio_flagged(bio, BIO_EOPNOTSUPP))
+			ret = -EOPNOTSUPP;
+		else if (!bio_flagged(bio, BIO_UPTODATE))
+			ret = -EIO;
+		bio_put(bio);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-core.c b/block/blk-core.c
index 2cba5ef..2d053b5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -26,8 +26,6 @@
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/task_io_accounting_ops.h>
-#include <linux/interrupt.h>
-#include <linux/cpu.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
 
@@ -50,27 +48,26 @@
  */
 static struct workqueue_struct *kblockd_workqueue;
 
-static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
-
 static void drive_stat_acct(struct request *rq, int new_io)
 {
 	struct hd_struct *part;
 	int rw = rq_data_dir(rq);
+	int cpu;
 
 	if (!blk_fs_request(rq) || !rq->rq_disk)
 		return;
 
-	part = get_part(rq->rq_disk, rq->sector);
+	cpu = part_stat_lock();
+	part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
+
 	if (!new_io)
-		__all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector);
+		part_stat_inc(cpu, part, merges[rw]);
 	else {
-		disk_round_stats(rq->rq_disk);
-		rq->rq_disk->in_flight++;
-		if (part) {
-			part_round_stats(part);
-			part->in_flight++;
-		}
+		part_round_stats(cpu, part);
+		part_inc_in_flight(part);
 	}
+
+	part_stat_unlock();
 }
 
 void blk_queue_congestion_threshold(struct request_queue *q)
@@ -113,7 +110,8 @@
 	memset(rq, 0, sizeof(*rq));
 
 	INIT_LIST_HEAD(&rq->queuelist);
-	INIT_LIST_HEAD(&rq->donelist);
+	INIT_LIST_HEAD(&rq->timeout_list);
+	rq->cpu = -1;
 	rq->q = q;
 	rq->sector = rq->hard_sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
@@ -308,7 +306,7 @@
 	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
 				q->rq.count[READ] + q->rq.count[WRITE]);
 
-	kblockd_schedule_work(&q->unplug_work);
+	kblockd_schedule_work(q, &q->unplug_work);
 }
 
 void blk_unplug(struct request_queue *q)
@@ -325,6 +323,21 @@
 }
 EXPORT_SYMBOL(blk_unplug);
 
+static void blk_invoke_request_fn(struct request_queue *q)
+{
+	/*
+	 * one level of recursion is ok and is much faster than kicking
+	 * the unplug handling
+	 */
+	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+		q->request_fn(q);
+		queue_flag_clear(QUEUE_FLAG_REENTER, q);
+	} else {
+		queue_flag_set(QUEUE_FLAG_PLUGGED, q);
+		kblockd_schedule_work(q, &q->unplug_work);
+	}
+}
+
 /**
  * blk_start_queue - restart a previously stopped queue
  * @q:    The &struct request_queue in question
@@ -339,18 +352,7 @@
 	WARN_ON(!irqs_disabled());
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-
-	/*
-	 * one level of recursion is ok and is much faster than kicking
-	 * the unplug handling
-	 */
-	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-		q->request_fn(q);
-		queue_flag_clear(QUEUE_FLAG_REENTER, q);
-	} else {
-		blk_plug_device(q);
-		kblockd_schedule_work(&q->unplug_work);
-	}
+	blk_invoke_request_fn(q);
 }
 EXPORT_SYMBOL(blk_start_queue);
 
@@ -408,15 +410,8 @@
 	 * Only recurse once to avoid overrunning the stack, let the unplug
 	 * handling reinvoke the handler shortly if we already got there.
 	 */
-	if (!elv_queue_empty(q)) {
-		if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-			q->request_fn(q);
-			queue_flag_clear(QUEUE_FLAG_REENTER, q);
-		} else {
-			blk_plug_device(q);
-			kblockd_schedule_work(&q->unplug_work);
-		}
-	}
+	if (!elv_queue_empty(q))
+		blk_invoke_request_fn(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -441,6 +436,14 @@
 
 void blk_cleanup_queue(struct request_queue *q)
 {
+	/*
+	 * We know we have process context here, so we can be a little
+	 * cautious and ensure that pending block actions on this device
+	 * are done before moving on. Going into this function, we should
+	 * not have processes doing IO to this device.
+	 */
+	blk_sync_queue(q);
+
 	mutex_lock(&q->sysfs_lock);
 	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
 	mutex_unlock(&q->sysfs_lock);
@@ -496,6 +499,8 @@
 	}
 
 	init_timer(&q->unplug_timer);
+	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
+	INIT_LIST_HEAD(&q->timeout_list);
 
 	kobject_init(&q->kobj, &blk_queue_ktype);
 
@@ -531,7 +536,7 @@
  *    request queue; this lock will be taken also from interrupt context, so irq
  *    disabling is needed for it.
  *
- *    Function returns a pointer to the initialized request queue, or NULL if
+ *    Function returns a pointer to the initialized request queue, or %NULL if
  *    it didn't succeed.
  *
  * Note:
@@ -569,7 +574,8 @@
 	q->request_fn		= rfn;
 	q->prep_rq_fn		= NULL;
 	q->unplug_fn		= generic_unplug_device;
-	q->queue_flags		= (1 << QUEUE_FLAG_CLUSTER);
+	q->queue_flags		= (1 << QUEUE_FLAG_CLUSTER |
+				   1 << QUEUE_FLAG_STACKABLE);
 	q->queue_lock		= lock;
 
 	blk_queue_segment_boundary(q, 0xffffffff);
@@ -624,10 +630,6 @@
 
 	blk_rq_init(q, rq);
 
-	/*
-	 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
-	 * see bio.h and blkdev.h
-	 */
 	rq->cmd_flags = rw | REQ_ALLOCED;
 
 	if (priv) {
@@ -888,9 +890,11 @@
  */
 void blk_start_queueing(struct request_queue *q)
 {
-	if (!blk_queue_plugged(q))
+	if (!blk_queue_plugged(q)) {
+		if (unlikely(blk_queue_stopped(q)))
+			return;
 		q->request_fn(q);
-	else
+	} else
 		__generic_unplug_device(q);
 }
 EXPORT_SYMBOL(blk_start_queueing);
@@ -907,6 +911,8 @@
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
+	blk_delete_timer(rq);
+	blk_clear_rq_complete(rq);
 	blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
 
 	if (blk_rq_tagged(rq))
@@ -917,7 +923,7 @@
 EXPORT_SYMBOL(blk_requeue_request);
 
 /**
- * blk_insert_request - insert a special request in to a request queue
+ * blk_insert_request - insert a special request into a request queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
  * @at_head:	insert request at head or tail of queue
@@ -927,8 +933,8 @@
  *    Many block devices need to execute commands asynchronously, so they don't
  *    block the whole kernel from preemption during request execution.  This is
  *    accomplished normally by inserting aritficial requests tagged as
- *    REQ_SPECIAL in to the corresponding request queue, and letting them be
- *    scheduled for actual execution by the request queue.
+ *    REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
+ *    be scheduled for actual execution by the request queue.
  *
  *    We have the option of inserting the head or the tail of the queue.
  *    Typically we use the tail for new ioctls and so forth.  We use the head
@@ -982,8 +988,22 @@
 	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
 }
 
-/*
- * disk_round_stats()	- Round off the performance stats on a struct
+static void part_round_stats_single(int cpu, struct hd_struct *part,
+				    unsigned long now)
+{
+	if (now == part->stamp)
+		return;
+
+	if (part->in_flight) {
+		__part_stat_add(cpu, part, time_in_queue,
+				part->in_flight * (now - part->stamp));
+		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
+	}
+	part->stamp = now;
+}
+
+/**
+ * part_round_stats()	- Round off the performance stats on a struct
  * disk_stats.
  *
  * The average IO queue length and utilisation statistics are maintained
@@ -997,36 +1017,15 @@
  * /proc/diskstats.  This accounts immediately for all queue usage up to
  * the current jiffies and restarts the counters again.
  */
-void disk_round_stats(struct gendisk *disk)
+void part_round_stats(int cpu, struct hd_struct *part)
 {
 	unsigned long now = jiffies;
 
-	if (now == disk->stamp)
-		return;
-
-	if (disk->in_flight) {
-		__disk_stat_add(disk, time_in_queue,
-				disk->in_flight * (now - disk->stamp));
-		__disk_stat_add(disk, io_ticks, (now - disk->stamp));
-	}
-	disk->stamp = now;
+	if (part->partno)
+		part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
+	part_round_stats_single(cpu, part, now);
 }
-EXPORT_SYMBOL_GPL(disk_round_stats);
-
-void part_round_stats(struct hd_struct *part)
-{
-	unsigned long now = jiffies;
-
-	if (now == part->stamp)
-		return;
-
-	if (part->in_flight) {
-		__part_stat_add(part, time_in_queue,
-				part->in_flight * (now - part->stamp));
-		__part_stat_add(part, io_ticks, (now - part->stamp));
-	}
-	part->stamp = now;
-}
+EXPORT_SYMBOL_GPL(part_round_stats);
 
 /*
  * queue lock must be held
@@ -1070,6 +1069,7 @@
 
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
+	req->cpu = bio->bi_comp_cpu;
 	req->cmd_type = REQ_TYPE_FS;
 
 	/*
@@ -1081,7 +1081,12 @@
 	/*
 	 * REQ_BARRIER implies no merging, but lets make it explicit
 	 */
-	if (unlikely(bio_barrier(bio)))
+	if (unlikely(bio_discard(bio))) {
+		req->cmd_flags |= REQ_DISCARD;
+		if (bio_barrier(bio))
+			req->cmd_flags |= REQ_SOFTBARRIER;
+		req->q->prepare_discard_fn(req->q, req);
+	} else if (unlikely(bio_barrier(bio)))
 		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 
 	if (bio_sync(bio))
@@ -1099,7 +1104,7 @@
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
 	struct request *req;
-	int el_ret, nr_sectors, barrier, err;
+	int el_ret, nr_sectors, barrier, discard, err;
 	const unsigned short prio = bio_prio(bio);
 	const int sync = bio_sync(bio);
 	int rw_flags;
@@ -1114,7 +1119,14 @@
 	blk_queue_bounce(q, &bio);
 
 	barrier = bio_barrier(bio);
-	if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
+	if (unlikely(barrier) && bio_has_data(bio) &&
+	    (q->next_ordered == QUEUE_ORDERED_NONE)) {
+		err = -EOPNOTSUPP;
+		goto end_io;
+	}
+
+	discard = bio_discard(bio);
+	if (unlikely(discard) && !q->prepare_discard_fn) {
 		err = -EOPNOTSUPP;
 		goto end_io;
 	}
@@ -1138,6 +1150,8 @@
 		req->biotail = bio;
 		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 		req->ioprio = ioprio_best(req->ioprio, prio);
+		if (!blk_rq_cpu_valid(req))
+			req->cpu = bio->bi_comp_cpu;
 		drive_stat_acct(req, 0);
 		if (!attempt_back_merge(q, req))
 			elv_merged_request(q, req, el_ret);
@@ -1165,6 +1179,8 @@
 		req->sector = req->hard_sector = bio->bi_sector;
 		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 		req->ioprio = ioprio_best(req->ioprio, prio);
+		if (!blk_rq_cpu_valid(req))
+			req->cpu = bio->bi_comp_cpu;
 		drive_stat_acct(req, 0);
 		if (!attempt_front_merge(q, req))
 			elv_merged_request(q, req, el_ret);
@@ -1200,13 +1216,15 @@
 	init_request_from_bio(req, bio);
 
 	spin_lock_irq(q->queue_lock);
+	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
+	    bio_flagged(bio, BIO_CPU_AFFINE))
+		req->cpu = blk_cpu_to_group(smp_processor_id());
 	if (elv_queue_empty(q))
 		blk_plug_device(q);
 	add_request(q, req);
 out:
 	if (sync)
 		__generic_unplug_device(q);
-
 	spin_unlock_irq(q->queue_lock);
 	return 0;
 
@@ -1260,8 +1278,9 @@
 
 static int should_fail_request(struct bio *bio)
 {
-	if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) ||
-	    (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail))
+	struct hd_struct *part = bio->bi_bdev->bd_part;
+
+	if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
 		return should_fail(&fail_make_request, bio->bi_size);
 
 	return 0;
@@ -1314,7 +1333,7 @@
 }
 
 /**
- * generic_make_request: hand a buffer to its device driver for I/O
+ * generic_make_request - hand a buffer to its device driver for I/O
  * @bio:  The bio describing the location in memory and on the device.
  *
  * generic_make_request() is used to make I/O requests of block
@@ -1409,7 +1428,8 @@
 
 		if (bio_check_eod(bio, nr_sectors))
 			goto end_io;
-		if (bio_empty_barrier(bio) && !q->prepare_flush_fn) {
+		if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||
+		    (bio_discard(bio) && !q->prepare_discard_fn)) {
 			err = -EOPNOTSUPP;
 			goto end_io;
 		}
@@ -1471,13 +1491,13 @@
 EXPORT_SYMBOL(generic_make_request);
 
 /**
- * submit_bio: submit a bio to the block device layer for I/O
+ * submit_bio - submit a bio to the block device layer for I/O
  * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
  * @bio: The &struct bio which describes the I/O
  *
  * submit_bio() is very similar in purpose to generic_make_request(), and
  * uses that function to do most of the work. Both are fairly rough
- * interfaces, @bio must be presetup and ready for I/O.
+ * interfaces; @bio must be presetup and ready for I/O.
  *
  */
 void submit_bio(int rw, struct bio *bio)
@@ -1490,11 +1510,7 @@
 	 * If it's a regular read/write or a barrier with data attached,
 	 * go through the normal accounting stuff before submission.
 	 */
-	if (!bio_empty_barrier(bio)) {
-
-		BIO_BUG_ON(!bio->bi_size);
-		BIO_BUG_ON(!bio->bi_io_vec);
-
+	if (bio_has_data(bio)) {
 		if (rw & WRITE) {
 			count_vm_events(PGPGOUT, count);
 		} else {
@@ -1517,9 +1533,90 @@
 EXPORT_SYMBOL(submit_bio);
 
 /**
+ * blk_rq_check_limits - Helper function to check a request for the queue limit
+ * @q:  the queue
+ * @rq: the request being checked
+ *
+ * Description:
+ *    @rq may have been made based on weaker limitations of upper-level queues
+ *    in request stacking drivers, and it may violate the limitation of @q.
+ *    Since the block layer and the underlying device driver trust @rq
+ *    after it is inserted to @q, it should be checked against @q before
+ *    the insertion using this generic function.
+ *
+ *    This function should also be useful for request stacking drivers
+ *    in some cases below, so export this fuction.
+ *    Request stacking drivers like request-based dm may change the queue
+ *    limits while requests are in the queue (e.g. dm's table swapping).
+ *    Such request stacking drivers should check those requests agaist
+ *    the new queue limits again when they dispatch those requests,
+ *    although such checkings are also done against the old queue limits
+ *    when submitting requests.
+ */
+int blk_rq_check_limits(struct request_queue *q, struct request *rq)
+{
+	if (rq->nr_sectors > q->max_sectors ||
+	    rq->data_len > q->max_hw_sectors << 9) {
+		printk(KERN_ERR "%s: over max size limit.\n", __func__);
+		return -EIO;
+	}
+
+	/*
+	 * queue's settings related to segment counting like q->bounce_pfn
+	 * may differ from that of other stacking queues.
+	 * Recalculate it to check the request correctly on this queue's
+	 * limitation.
+	 */
+	blk_recalc_rq_segments(rq);
+	if (rq->nr_phys_segments > q->max_phys_segments ||
+	    rq->nr_phys_segments > q->max_hw_segments) {
+		printk(KERN_ERR "%s: over max segments limit.\n", __func__);
+		return -EIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blk_rq_check_limits);
+
+/**
+ * blk_insert_cloned_request - Helper for stacking drivers to submit a request
+ * @q:  the queue to submit the request
+ * @rq: the request being queued
+ */
+int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
+{
+	unsigned long flags;
+
+	if (blk_rq_check_limits(q, rq))
+		return -EIO;
+
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+	if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
+	    should_fail(&fail_make_request, blk_rq_bytes(rq)))
+		return -EIO;
+#endif
+
+	spin_lock_irqsave(q->queue_lock, flags);
+
+	/*
+	 * Submitting request must be dequeued before calling this function
+	 * because it will be linked to another request_queue
+	 */
+	BUG_ON(blk_queued_rq(rq));
+
+	drive_stat_acct(rq, 1);
+	__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
+
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
+
+/**
  * __end_that_request_first - end I/O on a request
  * @req:      the request being processed
- * @error:    0 for success, < 0 for error
+ * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -1527,8 +1624,8 @@
  *     for the next range of segments (if any) in the cluster.
  *
  * Return:
- *     0 - we are done with this request, call end_that_request_last()
- *     1 - still buffers pending for this request
+ *     %0 - we are done with this request, call end_that_request_last()
+ *     %1 - still buffers pending for this request
  **/
 static int __end_that_request_first(struct request *req, int error,
 				    int nr_bytes)
@@ -1539,7 +1636,7 @@
 	blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
 
 	/*
-	 * for a REQ_BLOCK_PC request, we want to carry any eventual
+	 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
 	 * sense key with us all the way through
 	 */
 	if (!blk_pc_request(req))
@@ -1552,11 +1649,14 @@
 	}
 
 	if (blk_fs_request(req) && req->rq_disk) {
-		struct hd_struct *part = get_part(req->rq_disk, req->sector);
 		const int rw = rq_data_dir(req);
+		struct hd_struct *part;
+		int cpu;
 
-		all_stat_add(req->rq_disk, part, sectors[rw],
-				nr_bytes >> 9, req->sector);
+		cpu = part_stat_lock();
+		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+		part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9);
+		part_stat_unlock();
 	}
 
 	total_bytes = bio_nbytes = 0;
@@ -1641,88 +1741,14 @@
 }
 
 /*
- * splice the completion data to a local structure and hand off to
- * process_completion_queue() to complete the requests
- */
-static void blk_done_softirq(struct softirq_action *h)
-{
-	struct list_head *cpu_list, local_list;
-
-	local_irq_disable();
-	cpu_list = &__get_cpu_var(blk_cpu_done);
-	list_replace_init(cpu_list, &local_list);
-	local_irq_enable();
-
-	while (!list_empty(&local_list)) {
-		struct request *rq;
-
-		rq = list_entry(local_list.next, struct request, donelist);
-		list_del_init(&rq->donelist);
-		rq->q->softirq_done_fn(rq);
-	}
-}
-
-static int __cpuinit blk_cpu_notify(struct notifier_block *self,
-				    unsigned long action, void *hcpu)
-{
-	/*
-	 * If a CPU goes away, splice its entries to the current CPU
-	 * and trigger a run of the softirq
-	 */
-	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-		int cpu = (unsigned long) hcpu;
-
-		local_irq_disable();
-		list_splice_init(&per_cpu(blk_cpu_done, cpu),
-				 &__get_cpu_var(blk_cpu_done));
-		raise_softirq_irqoff(BLOCK_SOFTIRQ);
-		local_irq_enable();
-	}
-
-	return NOTIFY_OK;
-}
-
-
-static struct notifier_block blk_cpu_notifier __cpuinitdata = {
-	.notifier_call	= blk_cpu_notify,
-};
-
-/**
- * blk_complete_request - end I/O on a request
- * @req:      the request being processed
- *
- * Description:
- *     Ends all I/O on a request. It does not handle partial completions,
- *     unless the driver actually implements this in its completion callback
- *     through requeueing. The actual completion happens out-of-order,
- *     through a softirq handler. The user must have registered a completion
- *     callback through blk_queue_softirq_done().
- **/
-
-void blk_complete_request(struct request *req)
-{
-	struct list_head *cpu_list;
-	unsigned long flags;
-
-	BUG_ON(!req->q->softirq_done_fn);
-
-	local_irq_save(flags);
-
-	cpu_list = &__get_cpu_var(blk_cpu_done);
-	list_add_tail(&req->donelist, cpu_list);
-	raise_softirq_irqoff(BLOCK_SOFTIRQ);
-
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL(blk_complete_request);
-
-/*
  * queue lock must be held
  */
 static void end_that_request_last(struct request *req, int error)
 {
 	struct gendisk *disk = req->rq_disk;
 
+	blk_delete_timer(req);
+
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
 
@@ -1740,16 +1766,18 @@
 	if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
-		struct hd_struct *part = get_part(disk, req->sector);
+		struct hd_struct *part;
+		int cpu;
 
-		__all_stat_inc(disk, part, ios[rw], req->sector);
-		__all_stat_add(disk, part, ticks[rw], duration, req->sector);
-		disk_round_stats(disk);
-		disk->in_flight--;
-		if (part) {
-			part_round_stats(part);
-			part->in_flight--;
-		}
+		cpu = part_stat_lock();
+		part = disk_map_sector_rcu(disk, req->sector);
+
+		part_stat_inc(cpu, part, ios[rw]);
+		part_stat_add(cpu, part, ticks[rw], duration);
+		part_round_stats(cpu, part);
+		part_dec_in_flight(part);
+
+		part_stat_unlock();
 	}
 
 	if (req->end_io)
@@ -1762,17 +1790,6 @@
 	}
 }
 
-static inline void __end_request(struct request *rq, int uptodate,
-				 unsigned int nr_bytes)
-{
-	int error = 0;
-
-	if (uptodate <= 0)
-		error = uptodate ? uptodate : -EIO;
-
-	__blk_end_request(rq, error, nr_bytes);
-}
-
 /**
  * blk_rq_bytes - Returns bytes left to complete in the entire request
  * @rq: the request being processed
@@ -1803,92 +1820,36 @@
 EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
 
 /**
- * end_queued_request - end all I/O on a queued request
- * @rq:		the request being processed
- * @uptodate:	error value or 0/1 uptodate flag
- *
- * Description:
- *     Ends all I/O on a request, and removes it from the block layer queues.
- *     Not suitable for normal IO completion, unless the driver still has
- *     the request attached to the block layer.
- *
- **/
-void end_queued_request(struct request *rq, int uptodate)
-{
-	__end_request(rq, uptodate, blk_rq_bytes(rq));
-}
-EXPORT_SYMBOL(end_queued_request);
-
-/**
- * end_dequeued_request - end all I/O on a dequeued request
- * @rq:		the request being processed
- * @uptodate:	error value or 0/1 uptodate flag
- *
- * Description:
- *     Ends all I/O on a request. The request must already have been
- *     dequeued using blkdev_dequeue_request(), as is normally the case
- *     for most drivers.
- *
- **/
-void end_dequeued_request(struct request *rq, int uptodate)
-{
-	__end_request(rq, uptodate, blk_rq_bytes(rq));
-}
-EXPORT_SYMBOL(end_dequeued_request);
-
-
-/**
  * end_request - end I/O on the current segment of the request
  * @req:	the request being processed
- * @uptodate:	error value or 0/1 uptodate flag
+ * @uptodate:	error value or %0/%1 uptodate flag
  *
  * Description:
  *     Ends I/O on the current segment of a request. If that is the only
  *     remaining segment, the request is also completed and freed.
  *
- *     This is a remnant of how older block drivers handled IO completions.
- *     Modern drivers typically end IO on the full request in one go, unless
+ *     This is a remnant of how older block drivers handled I/O completions.
+ *     Modern drivers typically end I/O on the full request in one go, unless
  *     they have a residual value to account for. For that case this function
  *     isn't really useful, unless the residual just happens to be the
  *     full current segment. In other words, don't use this function in new
- *     code. Either use end_request_completely(), or the
- *     end_that_request_chunk() (along with end_that_request_last()) for
- *     partial completions.
- *
+ *     code. Use blk_end_request() or __blk_end_request() to end a request.
  **/
 void end_request(struct request *req, int uptodate)
 {
-	__end_request(req, uptodate, req->hard_cur_sectors << 9);
+	int error = 0;
+
+	if (uptodate <= 0)
+		error = uptodate ? uptodate : -EIO;
+
+	__blk_end_request(req, error, req->hard_cur_sectors << 9);
 }
 EXPORT_SYMBOL(end_request);
 
-/**
- * blk_end_io - Generic end_io function to complete a request.
- * @rq:           the request being processed
- * @error:        0 for success, < 0 for error
- * @nr_bytes:     number of bytes to complete @rq
- * @bidi_bytes:   number of bytes to complete @rq->next_rq
- * @drv_callback: function called between completion of bios in the request
- *                and completion of the request.
- *                If the callback returns non 0, this helper returns without
- *                completion of the request.
- *
- * Description:
- *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
- *     If @rq has leftover, sets it up for the next range of segments.
- *
- * Return:
- *     0 - we are done with this request
- *     1 - this request is not freed yet, it still has pending buffers.
- **/
-static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
-		      unsigned int bidi_bytes,
-		      int (drv_callback)(struct request *))
+static int end_that_request_data(struct request *rq, int error,
+				 unsigned int nr_bytes, unsigned int bidi_bytes)
 {
-	struct request_queue *q = rq->q;
-	unsigned long flags = 0UL;
-
-	if (blk_fs_request(rq) || blk_pc_request(rq)) {
+	if (rq->bio) {
 		if (__end_that_request_first(rq, error, nr_bytes))
 			return 1;
 
@@ -1898,6 +1859,38 @@
 			return 1;
 	}
 
+	return 0;
+}
+
+/**
+ * blk_end_io - Generic end_io function to complete a request.
+ * @rq:           the request being processed
+ * @error:        %0 for success, < %0 for error
+ * @nr_bytes:     number of bytes to complete @rq
+ * @bidi_bytes:   number of bytes to complete @rq->next_rq
+ * @drv_callback: function called between completion of bios in the request
+ *                and completion of the request.
+ *                If the callback returns non %0, this helper returns without
+ *                completion of the request.
+ *
+ * Description:
+ *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
+ *     If @rq has leftover, sets it up for the next range of segments.
+ *
+ * Return:
+ *     %0 - we are done with this request
+ *     %1 - this request is not freed yet, it still has pending buffers.
+ **/
+static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
+		      unsigned int bidi_bytes,
+		      int (drv_callback)(struct request *))
+{
+	struct request_queue *q = rq->q;
+	unsigned long flags = 0UL;
+
+	if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
+		return 1;
+
 	/* Special feature for tricky drivers */
 	if (drv_callback && drv_callback(rq))
 		return 1;
@@ -1914,7 +1907,7 @@
 /**
  * blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    0 for success, < 0 for error
+ * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -1922,8 +1915,8 @@
  *     If @rq has leftover, sets it up for the next range of segments.
  *
  * Return:
- *     0 - we are done with this request
- *     1 - still buffers pending for this request
+ *     %0 - we are done with this request
+ *     %1 - still buffers pending for this request
  **/
 int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
@@ -1934,22 +1927,20 @@
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    0 for success, < 0 for error
+ * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
  *     Must be called with queue lock held unlike blk_end_request().
  *
  * Return:
- *     0 - we are done with this request
- *     1 - still buffers pending for this request
+ *     %0 - we are done with this request
+ *     %1 - still buffers pending for this request
  **/
 int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
-	if (blk_fs_request(rq) || blk_pc_request(rq)) {
-		if (__end_that_request_first(rq, error, nr_bytes))
-			return 1;
-	}
+	if (rq->bio && __end_that_request_first(rq, error, nr_bytes))
+		return 1;
 
 	add_disk_randomness(rq->rq_disk);
 
@@ -1962,7 +1953,7 @@
 /**
  * blk_end_bidi_request - Helper function for drivers to complete bidi request.
  * @rq:         the bidi request being processed
- * @error:      0 for success, < 0 for error
+ * @error:      %0 for success, < %0 for error
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
@@ -1970,8 +1961,8 @@
  *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
  *
  * Return:
- *     0 - we are done with this request
- *     1 - still buffers pending for this request
+ *     %0 - we are done with this request
+ *     %1 - still buffers pending for this request
  **/
 int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
 			 unsigned int bidi_bytes)
@@ -1981,13 +1972,43 @@
 EXPORT_SYMBOL_GPL(blk_end_bidi_request);
 
 /**
+ * blk_update_request - Special helper function for request stacking drivers
+ * @rq:           the request being processed
+ * @error:        %0 for success, < %0 for error
+ * @nr_bytes:     number of bytes to complete @rq
+ *
+ * Description:
+ *     Ends I/O on a number of bytes attached to @rq, but doesn't complete
+ *     the request structure even if @rq doesn't have leftover.
+ *     If @rq has leftover, sets it up for the next range of segments.
+ *
+ *     This special helper function is only for request stacking drivers
+ *     (e.g. request-based dm) so that they can handle partial completion.
+ *     Actual device drivers should use blk_end_request instead.
+ */
+void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)
+{
+	if (!end_that_request_data(rq, error, nr_bytes, 0)) {
+		/*
+		 * These members are not updated in end_that_request_data()
+		 * when all bios are completed.
+		 * Update them so that the request stacking driver can find
+		 * how many bytes remain in the request later.
+		 */
+		rq->nr_sectors = rq->hard_nr_sectors = 0;
+		rq->current_nr_sectors = rq->hard_cur_sectors = 0;
+	}
+}
+EXPORT_SYMBOL_GPL(blk_update_request);
+
+/**
  * blk_end_request_callback - Special helper function for tricky drivers
  * @rq:           the request being processed
- * @error:        0 for success, < 0 for error
+ * @error:        %0 for success, < %0 for error
  * @nr_bytes:     number of bytes to complete
  * @drv_callback: function called between completion of bios in the request
  *                and completion of the request.
- *                If the callback returns non 0, this helper returns without
+ *                If the callback returns non %0, this helper returns without
  *                completion of the request.
  *
  * Description:
@@ -2000,10 +2021,10 @@
  *     Don't use this interface in other places anymore.
  *
  * Return:
- *     0 - we are done with this request
- *     1 - this request is not freed yet.
- *         this request still has pending buffers or
- *         the driver doesn't want to finish this request yet.
+ *     %0 - we are done with this request
+ *     %1 - this request is not freed yet.
+ *          this request still has pending buffers or
+ *          the driver doesn't want to finish this request yet.
  **/
 int blk_end_request_callback(struct request *rq, int error,
 			     unsigned int nr_bytes,
@@ -2016,15 +2037,17 @@
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
 {
-	/* first two bits are identical in rq->cmd_flags and bio->bi_rw */
+	/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and
+	   we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
 	rq->cmd_flags |= (bio->bi_rw & 3);
 
-	rq->nr_phys_segments = bio_phys_segments(q, bio);
-	rq->nr_hw_segments = bio_hw_segments(q, bio);
+	if (bio_has_data(bio)) {
+		rq->nr_phys_segments = bio_phys_segments(q, bio);
+		rq->buffer = bio_data(bio);
+	}
 	rq->current_nr_sectors = bio_cur_sectors(bio);
 	rq->hard_cur_sectors = rq->current_nr_sectors;
 	rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
-	rq->buffer = bio_data(bio);
 	rq->data_len = bio->bi_size;
 
 	rq->bio = rq->biotail = bio;
@@ -2033,7 +2056,35 @@
 		rq->rq_disk = bio->bi_bdev->bd_disk;
 }
 
-int kblockd_schedule_work(struct work_struct *work)
+/**
+ * blk_lld_busy - Check if underlying low-level drivers of a device are busy
+ * @q : the queue of the device being checked
+ *
+ * Description:
+ *    Check if underlying low-level drivers of a device are busy.
+ *    If the drivers want to export their busy state, they must set own
+ *    exporting function using blk_queue_lld_busy() first.
+ *
+ *    Basically, this function is used only by request stacking drivers
+ *    to stop dispatching requests to underlying devices when underlying
+ *    devices are busy.  This behavior helps more I/O merging on the queue
+ *    of the request stacking driver and prevents I/O throughput regression
+ *    on burst I/O load.
+ *
+ * Return:
+ *    0 - Not busy (The request stacking driver should dispatch request)
+ *    1 - Busy (The request stacking driver should stop dispatching request)
+ */
+int blk_lld_busy(struct request_queue *q)
+{
+	if (q->lld_busy_fn)
+		return q->lld_busy_fn(q);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blk_lld_busy);
+
+int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 {
 	return queue_work(kblockd_workqueue, work);
 }
@@ -2047,8 +2098,6 @@
 
 int __init blk_dev_init(void)
 {
-	int i;
-
 	kblockd_workqueue = create_workqueue("kblockd");
 	if (!kblockd_workqueue)
 		panic("Failed to create kblockd\n");
@@ -2059,12 +2108,6 @@
 	blk_requestq_cachep = kmem_cache_create("blkdev_queue",
 			sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
 
-	for_each_possible_cpu(i)
-		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
-
-	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
-	register_hotcpu_notifier(&blk_cpu_notifier);
-
 	return 0;
 }
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 9bceff7..6af716d 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -16,7 +16,7 @@
 /**
  * blk_end_sync_rq - executes a completion event on a request
  * @rq: request to complete
- * @error: end io status of the request
+ * @error: end I/O status of the request
  */
 static void blk_end_sync_rq(struct request *rq, int error)
 {
@@ -41,7 +41,7 @@
  * @done:	I/O completion handler
  *
  * Description:
- *    Insert a fully prepared request at the back of the io scheduler queue
+ *    Insert a fully prepared request at the back of the I/O scheduler queue
  *    for execution.  Don't wait for completion.
  */
 void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
@@ -72,7 +72,7 @@
  * @at_head:    insert request at head or tail of queue
  *
  * Description:
- *    Insert a fully prepared request at the back of the io scheduler queue
+ *    Insert a fully prepared request at the back of the I/O scheduler queue
  *    for execution and wait for completion.
  */
 int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 3f1a847..61a8e2f 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -108,51 +108,51 @@
 EXPORT_SYMBOL(blk_rq_map_integrity_sg);
 
 /**
- * blk_integrity_compare - Compare integrity profile of two block devices
- * @b1:		Device to compare
- * @b2:		Device to compare
+ * blk_integrity_compare - Compare integrity profile of two disks
+ * @gd1:	Disk to compare
+ * @gd2:	Disk to compare
  *
  * Description: Meta-devices like DM and MD need to verify that all
  * sub-devices use the same integrity format before advertising to
  * upper layers that they can send/receive integrity metadata.  This
- * function can be used to check whether two block devices have
+ * function can be used to check whether two gendisk devices have
  * compatible integrity formats.
  */
-int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2)
+int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
 {
-	struct blk_integrity *b1 = bd1->bd_disk->integrity;
-	struct blk_integrity *b2 = bd2->bd_disk->integrity;
+	struct blk_integrity *b1 = gd1->integrity;
+	struct blk_integrity *b2 = gd2->integrity;
 
-	BUG_ON(bd1->bd_disk == NULL);
-	BUG_ON(bd2->bd_disk == NULL);
+	if (!b1 && !b2)
+		return 0;
 
 	if (!b1 || !b2)
-		return 0;
+		return -1;
 
 	if (b1->sector_size != b2->sector_size) {
 		printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
-		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+		       gd1->disk_name, gd2->disk_name,
 		       b1->sector_size, b2->sector_size);
 		return -1;
 	}
 
 	if (b1->tuple_size != b2->tuple_size) {
 		printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
-		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+		       gd1->disk_name, gd2->disk_name,
 		       b1->tuple_size, b2->tuple_size);
 		return -1;
 	}
 
 	if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
 		printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
-		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+		       gd1->disk_name, gd2->disk_name,
 		       b1->tag_size, b2->tag_size);
 		return -1;
 	}
 
 	if (strcmp(b1->name, b2->name)) {
 		printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
-		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+		       gd1->disk_name, gd2->disk_name,
 		       b1->name, b2->name);
 		return -1;
 	}
@@ -331,7 +331,8 @@
 			return -1;
 
 		if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
-					 &disk->dev.kobj, "%s", "integrity")) {
+					 &disk_to_dev(disk)->kobj,
+					 "%s", "integrity")) {
 			kmem_cache_free(integrity_cachep, bi);
 			return -1;
 		}
@@ -375,7 +376,7 @@
 
 	kobject_uevent(&bi->kobj, KOBJ_REMOVE);
 	kobject_del(&bi->kobj);
-	kobject_put(&disk->dev.kobj);
 	kmem_cache_free(integrity_cachep, bi);
+	disk->integrity = NULL;
 }
 EXPORT_SYMBOL(blk_integrity_unregister);
diff --git a/block/blk-map.c b/block/blk-map.c
index af37e4a..4849fa3 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -41,10 +41,10 @@
 }
 
 static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
-			     void __user *ubuf, unsigned int len)
+			     struct rq_map_data *map_data, void __user *ubuf,
+			     unsigned int len, int null_mapped, gfp_t gfp_mask)
 {
 	unsigned long uaddr;
-	unsigned int alignment;
 	struct bio *bio, *orig_bio;
 	int reading, ret;
 
@@ -55,15 +55,17 @@
 	 * direct dma. else, set up kernel bounce buffers
 	 */
 	uaddr = (unsigned long) ubuf;
-	alignment = queue_dma_alignment(q) | q->dma_pad_mask;
-	if (!(uaddr & alignment) && !(len & alignment))
-		bio = bio_map_user(q, NULL, uaddr, len, reading);
+	if (blk_rq_aligned(q, ubuf, len) && !map_data)
+		bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
 	else
-		bio = bio_copy_user(q, uaddr, len, reading);
+		bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask);
 
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
+	if (null_mapped)
+		bio->bi_flags |= (1 << BIO_NULL_MAPPED);
+
 	orig_bio = bio;
 	blk_queue_bounce(q, &bio);
 
@@ -85,17 +87,19 @@
 }
 
 /**
- * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage
+ * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage
  * @q:		request queue where request should be inserted
  * @rq:		request structure to fill
+ * @map_data:   pointer to the rq_map_data holding pages (if necessary)
  * @ubuf:	the user buffer
  * @len:	length of user data
+ * @gfp_mask:	memory allocation flags
  *
  * Description:
- *    Data will be mapped directly for zero copy io, if possible. Otherwise
+ *    Data will be mapped directly for zero copy I/O, if possible. Otherwise
  *    a kernel bounce buffer is used.
  *
- *    A matching blk_rq_unmap_user() must be issued at the end of io, while
+ *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
  *    still in process context.
  *
  *    Note: The mapped bio may need to be bounced through blk_queue_bounce()
@@ -105,16 +109,22 @@
  *    unmapping.
  */
 int blk_rq_map_user(struct request_queue *q, struct request *rq,
-		    void __user *ubuf, unsigned long len)
+		    struct rq_map_data *map_data, void __user *ubuf,
+		    unsigned long len, gfp_t gfp_mask)
 {
 	unsigned long bytes_read = 0;
 	struct bio *bio = NULL;
-	int ret;
+	int ret, null_mapped = 0;
 
 	if (len > (q->max_hw_sectors << 9))
 		return -EINVAL;
-	if (!len || !ubuf)
+	if (!len)
 		return -EINVAL;
+	if (!ubuf) {
+		if (!map_data || rq_data_dir(rq) != READ)
+			return -EINVAL;
+		null_mapped = 1;
+	}
 
 	while (bytes_read != len) {
 		unsigned long map_len, end, start;
@@ -132,7 +142,8 @@
 		if (end - start > BIO_MAX_PAGES)
 			map_len -= PAGE_SIZE;
 
-		ret = __blk_rq_map_user(q, rq, ubuf, map_len);
+		ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len,
+					null_mapped, gfp_mask);
 		if (ret < 0)
 			goto unmap_rq;
 		if (!bio)
@@ -154,18 +165,20 @@
 EXPORT_SYMBOL(blk_rq_map_user);
 
 /**
- * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage
+ * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
  * @q:		request queue where request should be inserted
  * @rq:		request to map data to
+ * @map_data:   pointer to the rq_map_data holding pages (if necessary)
  * @iov:	pointer to the iovec
  * @iov_count:	number of elements in the iovec
  * @len:	I/O byte count
+ * @gfp_mask:	memory allocation flags
  *
  * Description:
- *    Data will be mapped directly for zero copy io, if possible. Otherwise
+ *    Data will be mapped directly for zero copy I/O, if possible. Otherwise
  *    a kernel bounce buffer is used.
  *
- *    A matching blk_rq_unmap_user() must be issued at the end of io, while
+ *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
  *    still in process context.
  *
  *    Note: The mapped bio may need to be bounced through blk_queue_bounce()
@@ -175,7 +188,8 @@
  *    unmapping.
  */
 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
-			struct sg_iovec *iov, int iov_count, unsigned int len)
+			struct rq_map_data *map_data, struct sg_iovec *iov,
+			int iov_count, unsigned int len, gfp_t gfp_mask)
 {
 	struct bio *bio;
 	int i, read = rq_data_dir(rq) == READ;
@@ -193,10 +207,11 @@
 		}
 	}
 
-	if (unaligned || (q->dma_pad_mask & len))
-		bio = bio_copy_user_iov(q, iov, iov_count, read);
+	if (unaligned || (q->dma_pad_mask & len) || map_data)
+		bio = bio_copy_user_iov(q, map_data, iov, iov_count, read,
+					gfp_mask);
 	else
-		bio = bio_map_user_iov(q, NULL, iov, iov_count, read);
+		bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask);
 
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
@@ -216,6 +231,7 @@
 	rq->buffer = rq->data = NULL;
 	return 0;
 }
+EXPORT_SYMBOL(blk_rq_map_user_iov);
 
 /**
  * blk_rq_unmap_user - unmap a request with user data
@@ -224,7 +240,7 @@
  * Description:
  *    Unmap a rq previously mapped by blk_rq_map_user(). The caller must
  *    supply the original rq->bio from the blk_rq_map_user() return, since
- *    the io completion may have changed rq->bio.
+ *    the I/O completion may have changed rq->bio.
  */
 int blk_rq_unmap_user(struct bio *bio)
 {
@@ -250,7 +266,7 @@
 EXPORT_SYMBOL(blk_rq_unmap_user);
 
 /**
- * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage
+ * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage
  * @q:		request queue where request should be inserted
  * @rq:		request to fill
  * @kbuf:	the kernel buffer
@@ -264,8 +280,6 @@
 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 		    unsigned int len, gfp_t gfp_mask)
 {
-	unsigned long kaddr;
-	unsigned int alignment;
 	int reading = rq_data_dir(rq) == READ;
 	int do_copy = 0;
 	struct bio *bio;
@@ -275,11 +289,7 @@
 	if (!len || !kbuf)
 		return -EINVAL;
 
-	kaddr = (unsigned long)kbuf;
-	alignment = queue_dma_alignment(q) | q->dma_pad_mask;
-	do_copy = ((kaddr & alignment) || (len & alignment) ||
-		   object_is_on_stack(kbuf));
-
+	do_copy = !blk_rq_aligned(q, kbuf, len) || object_is_on_stack(kbuf);
 	if (do_copy)
 		bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
 	else
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 5efc9e7..908d3e1 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -11,7 +11,7 @@
 
 void blk_recalc_rq_sectors(struct request *rq, int nsect)
 {
-	if (blk_fs_request(rq)) {
+	if (blk_fs_request(rq) || blk_discard_rq(rq)) {
 		rq->hard_sector += nsect;
 		rq->hard_nr_sectors -= nsect;
 
@@ -41,12 +41,9 @@
 void blk_recalc_rq_segments(struct request *rq)
 {
 	int nr_phys_segs;
-	int nr_hw_segs;
 	unsigned int phys_size;
-	unsigned int hw_size;
 	struct bio_vec *bv, *bvprv = NULL;
 	int seg_size;
-	int hw_seg_size;
 	int cluster;
 	struct req_iterator iter;
 	int high, highprv = 1;
@@ -56,8 +53,8 @@
 		return;
 
 	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
-	hw_seg_size = seg_size = 0;
-	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
+	seg_size = 0;
+	phys_size = nr_phys_segs = 0;
 	rq_for_each_segment(bv, rq, iter) {
 		/*
 		 * the trick here is making sure that a high page is never
@@ -66,7 +63,7 @@
 		 */
 		high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
 		if (high || highprv)
-			goto new_hw_segment;
+			goto new_segment;
 		if (cluster) {
 			if (seg_size + bv->bv_len > q->max_segment_size)
 				goto new_segment;
@@ -74,40 +71,19 @@
 				goto new_segment;
 			if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
 				goto new_segment;
-			if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
-				goto new_hw_segment;
 
 			seg_size += bv->bv_len;
-			hw_seg_size += bv->bv_len;
 			bvprv = bv;
 			continue;
 		}
 new_segment:
-		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
-		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
-			hw_seg_size += bv->bv_len;
-		else {
-new_hw_segment:
-			if (nr_hw_segs == 1 &&
-			    hw_seg_size > rq->bio->bi_hw_front_size)
-				rq->bio->bi_hw_front_size = hw_seg_size;
-			hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
-			nr_hw_segs++;
-		}
-
 		nr_phys_segs++;
 		bvprv = bv;
 		seg_size = bv->bv_len;
 		highprv = high;
 	}
 
-	if (nr_hw_segs == 1 &&
-	    hw_seg_size > rq->bio->bi_hw_front_size)
-		rq->bio->bi_hw_front_size = hw_seg_size;
-	if (hw_seg_size > rq->biotail->bi_hw_back_size)
-		rq->biotail->bi_hw_back_size = hw_seg_size;
 	rq->nr_phys_segments = nr_phys_segs;
-	rq->nr_hw_segments = nr_hw_segs;
 }
 
 void blk_recount_segments(struct request_queue *q, struct bio *bio)
@@ -120,7 +96,6 @@
 	blk_recalc_rq_segments(&rq);
 	bio->bi_next = nxt;
 	bio->bi_phys_segments = rq.nr_phys_segments;
-	bio->bi_hw_segments = rq.nr_hw_segments;
 	bio->bi_flags |= (1 << BIO_SEG_VALID);
 }
 EXPORT_SYMBOL(blk_recount_segments);
@@ -131,13 +106,17 @@
 	if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
 		return 0;
 
-	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
-		return 0;
 	if (bio->bi_size + nxt->bi_size > q->max_segment_size)
 		return 0;
 
+	if (!bio_has_data(bio))
+		return 1;
+
+	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
+		return 0;
+
 	/*
-	 * bio and nxt are contigous in memory, check if the queue allows
+	 * bio and nxt are contiguous in memory; check if the queue allows
 	 * these two to be merged into one
 	 */
 	if (BIO_SEG_BOUNDARY(q, bio, nxt))
@@ -146,22 +125,6 @@
 	return 0;
 }
 
-static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
-				 struct bio *nxt)
-{
-	if (!bio_flagged(bio, BIO_SEG_VALID))
-		blk_recount_segments(q, bio);
-	if (!bio_flagged(nxt, BIO_SEG_VALID))
-		blk_recount_segments(q, nxt);
-	if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
-	    BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
-		return 0;
-	if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size)
-		return 0;
-
-	return 1;
-}
-
 /*
  * map a request to scatterlist, return number of sg entries setup. Caller
  * must make sure sg can hold rq->nr_phys_segments entries
@@ -275,10 +238,9 @@
 				    struct request *req,
 				    struct bio *bio)
 {
-	int nr_hw_segs = bio_hw_segments(q, bio);
 	int nr_phys_segs = bio_phys_segments(q, bio);
 
-	if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
+	if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
 	    || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
@@ -290,7 +252,6 @@
 	 * This will form the start of a new hw segment.  Bump both
 	 * counters.
 	 */
-	req->nr_hw_segments += nr_hw_segs;
 	req->nr_phys_segments += nr_phys_segs;
 	return 1;
 }
@@ -299,7 +260,6 @@
 		     struct bio *bio)
 {
 	unsigned short max_sectors;
-	int len;
 
 	if (unlikely(blk_pc_request(req)))
 		max_sectors = q->max_hw_sectors;
@@ -316,19 +276,6 @@
 		blk_recount_segments(q, req->biotail);
 	if (!bio_flagged(bio, BIO_SEG_VALID))
 		blk_recount_segments(q, bio);
-	len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
-	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
-	    && !BIOVEC_VIRT_OVERSIZE(len)) {
-		int mergeable =  ll_new_mergeable(q, req, bio);
-
-		if (mergeable) {
-			if (req->nr_hw_segments == 1)
-				req->bio->bi_hw_front_size = len;
-			if (bio->bi_hw_segments == 1)
-				bio->bi_hw_back_size = len;
-		}
-		return mergeable;
-	}
 
 	return ll_new_hw_segment(q, req, bio);
 }
@@ -337,7 +284,6 @@
 		      struct bio *bio)
 {
 	unsigned short max_sectors;
-	int len;
 
 	if (unlikely(blk_pc_request(req)))
 		max_sectors = q->max_hw_sectors;
@@ -351,23 +297,10 @@
 			q->last_merge = NULL;
 		return 0;
 	}
-	len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
 	if (!bio_flagged(bio, BIO_SEG_VALID))
 		blk_recount_segments(q, bio);
 	if (!bio_flagged(req->bio, BIO_SEG_VALID))
 		blk_recount_segments(q, req->bio);
-	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
-	    !BIOVEC_VIRT_OVERSIZE(len)) {
-		int mergeable =  ll_new_mergeable(q, req, bio);
-
-		if (mergeable) {
-			if (bio->bi_hw_segments == 1)
-				bio->bi_hw_front_size = len;
-			if (req->nr_hw_segments == 1)
-				req->biotail->bi_hw_back_size = len;
-		}
-		return mergeable;
-	}
 
 	return ll_new_hw_segment(q, req, bio);
 }
@@ -376,7 +309,6 @@
 				struct request *next)
 {
 	int total_phys_segments;
-	int total_hw_segments;
 
 	/*
 	 * First check if the either of the requests are re-queued
@@ -398,26 +330,11 @@
 	if (total_phys_segments > q->max_phys_segments)
 		return 0;
 
-	total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
-	if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
-		int len = req->biotail->bi_hw_back_size +
-				next->bio->bi_hw_front_size;
-		/*
-		 * propagate the combined length to the end of the requests
-		 */
-		if (req->nr_hw_segments == 1)
-			req->bio->bi_hw_front_size = len;
-		if (next->nr_hw_segments == 1)
-			next->biotail->bi_hw_back_size = len;
-		total_hw_segments--;
-	}
-
-	if (total_hw_segments > q->max_hw_segments)
+	if (total_phys_segments > q->max_hw_segments)
 		return 0;
 
 	/* Merge is OK... */
 	req->nr_phys_segments = total_phys_segments;
-	req->nr_hw_segments = total_hw_segments;
 	return 1;
 }
 
@@ -470,17 +387,21 @@
 	elv_merge_requests(q, req, next);
 
 	if (req->rq_disk) {
-		struct hd_struct *part
-			= get_part(req->rq_disk, req->sector);
-		disk_round_stats(req->rq_disk);
-		req->rq_disk->in_flight--;
-		if (part) {
-			part_round_stats(part);
-			part->in_flight--;
-		}
+		struct hd_struct *part;
+		int cpu;
+
+		cpu = part_stat_lock();
+		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+
+		part_round_stats(cpu, part);
+		part_dec_in_flight(part);
+
+		part_stat_unlock();
 	}
 
 	req->ioprio = ioprio_best(req->ioprio, next->ioprio);
+	if (blk_rq_cpu_valid(next))
+		req->cpu = next->cpu;
 
 	__blk_put_request(q, next);
 	return 1;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index dfc7701..b21dcdb 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -33,6 +33,23 @@
 EXPORT_SYMBOL(blk_queue_prep_rq);
 
 /**
+ * blk_queue_set_discard - set a discard_sectors function for queue
+ * @q:		queue
+ * @dfn:	prepare_discard function
+ *
+ * It's possible for a queue to register a discard callback which is used
+ * to transform a discard request into the appropriate type for the
+ * hardware. If none is registered, then discard requests are failed
+ * with %EOPNOTSUPP.
+ *
+ */
+void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
+{
+	q->prepare_discard_fn = dfn;
+}
+EXPORT_SYMBOL(blk_queue_set_discard);
+
+/**
  * blk_queue_merge_bvec - set a merge_bvec function for queue
  * @q:		queue
  * @mbfn:	merge_bvec_fn
@@ -60,6 +77,24 @@
 }
 EXPORT_SYMBOL(blk_queue_softirq_done);
 
+void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
+{
+	q->rq_timeout = timeout;
+}
+EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
+
+void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
+{
+	q->rq_timed_out_fn = fn;
+}
+EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
+
+void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
+{
+	q->lld_busy_fn = fn;
+}
+EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
+
 /**
  * blk_queue_make_request - define an alternate make_request function for a device
  * @q:  the request queue for the device to be affected
@@ -127,7 +162,7 @@
  *    Different hardware can have different requirements as to what pages
  *    it can do I/O directly to. A low level driver can call
  *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
- *    buffers for doing I/O to pages residing above @page.
+ *    buffers for doing I/O to pages residing above @dma_addr.
  **/
 void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr)
 {
@@ -212,7 +247,7 @@
  * Description:
  *    Enables a low level driver to set an upper limit on the number of
  *    hw data segments in a request.  This would be the largest number of
- *    address/length pairs the host adapter can actually give as once
+ *    address/length pairs the host adapter can actually give at once
  *    to the device.
  **/
 void blk_queue_max_hw_segments(struct request_queue *q,
@@ -393,7 +428,7 @@
  * @mask:  alignment mask
  *
  * description:
- *    set required memory and length aligment for direct dma transactions.
+ *    set required memory and length alignment for direct dma transactions.
  *    this is used when buiding direct io requests for the queue.
  *
  **/
@@ -409,7 +444,7 @@
  * @mask:  alignment mask
  *
  * description:
- *    update required memory and length aligment for direct dma transactions.
+ *    update required memory and length alignment for direct dma transactions.
  *    If the requested alignment is larger than the current alignment, then
  *    the current queue alignment is updated to the new value, otherwise it
  *    is left alone.  The design of this is to allow multiple objects
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
new file mode 100644
index 0000000..e660d26
--- /dev/null
+++ b/block/blk-softirq.c
@@ -0,0 +1,175 @@
+/*
+ * Functions related to softirq rq completions
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+
+#include "blk.h"
+
+static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
+
+/*
+ * Softirq action handler - move entries to local list and loop over them
+ * while passing them to the queue registered handler.
+ */
+static void blk_done_softirq(struct softirq_action *h)
+{
+	struct list_head *cpu_list, local_list;
+
+	local_irq_disable();
+	cpu_list = &__get_cpu_var(blk_cpu_done);
+	list_replace_init(cpu_list, &local_list);
+	local_irq_enable();
+
+	while (!list_empty(&local_list)) {
+		struct request *rq;
+
+		rq = list_entry(local_list.next, struct request, csd.list);
+		list_del_init(&rq->csd.list);
+		rq->q->softirq_done_fn(rq);
+	}
+}
+
+#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+static void trigger_softirq(void *data)
+{
+	struct request *rq = data;
+	unsigned long flags;
+	struct list_head *list;
+
+	local_irq_save(flags);
+	list = &__get_cpu_var(blk_cpu_done);
+	list_add_tail(&rq->csd.list, list);
+
+	if (list->next == &rq->csd.list)
+		raise_softirq_irqoff(BLOCK_SOFTIRQ);
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Setup and invoke a run of 'trigger_softirq' on the given cpu.
+ */
+static int raise_blk_irq(int cpu, struct request *rq)
+{
+	if (cpu_online(cpu)) {
+		struct call_single_data *data = &rq->csd;
+
+		data->func = trigger_softirq;
+		data->info = rq;
+		data->flags = 0;
+
+		__smp_call_function_single(cpu, data);
+		return 0;
+	}
+
+	return 1;
+}
+#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
+static int raise_blk_irq(int cpu, struct request *rq)
+{
+	return 1;
+}
+#endif
+
+static int __cpuinit blk_cpu_notify(struct notifier_block *self,
+				    unsigned long action, void *hcpu)
+{
+	/*
+	 * If a CPU goes away, splice its entries to the current CPU
+	 * and trigger a run of the softirq
+	 */
+	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+		int cpu = (unsigned long) hcpu;
+
+		local_irq_disable();
+		list_splice_init(&per_cpu(blk_cpu_done, cpu),
+				 &__get_cpu_var(blk_cpu_done));
+		raise_softirq_irqoff(BLOCK_SOFTIRQ);
+		local_irq_enable();
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata blk_cpu_notifier = {
+	.notifier_call	= blk_cpu_notify,
+};
+
+void __blk_complete_request(struct request *req)
+{
+	struct request_queue *q = req->q;
+	unsigned long flags;
+	int ccpu, cpu, group_cpu;
+
+	BUG_ON(!q->softirq_done_fn);
+
+	local_irq_save(flags);
+	cpu = smp_processor_id();
+	group_cpu = blk_cpu_to_group(cpu);
+
+	/*
+	 * Select completion CPU
+	 */
+	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
+		ccpu = req->cpu;
+	else
+		ccpu = cpu;
+
+	if (ccpu == cpu || ccpu == group_cpu) {
+		struct list_head *list;
+do_local:
+		list = &__get_cpu_var(blk_cpu_done);
+		list_add_tail(&req->csd.list, list);
+
+		/*
+		 * if the list only contains our just added request,
+		 * signal a raise of the softirq. If there are already
+		 * entries there, someone already raised the irq but it
+		 * hasn't run yet.
+		 */
+		if (list->next == &req->csd.list)
+			raise_softirq_irqoff(BLOCK_SOFTIRQ);
+	} else if (raise_blk_irq(ccpu, req))
+		goto do_local;
+
+	local_irq_restore(flags);
+}
+
+/**
+ * blk_complete_request - end I/O on a request
+ * @req:      the request being processed
+ *
+ * Description:
+ *     Ends all I/O on a request. It does not handle partial completions,
+ *     unless the driver actually implements this in its completion callback
+ *     through requeueing. The actual completion happens out-of-order,
+ *     through a softirq handler. The user must have registered a completion
+ *     callback through blk_queue_softirq_done().
+ **/
+void blk_complete_request(struct request *req)
+{
+	if (unlikely(blk_should_fake_timeout(req->q)))
+		return;
+	if (!blk_mark_rq_complete(req))
+		__blk_complete_request(req);
+}
+EXPORT_SYMBOL(blk_complete_request);
+
+__init int blk_softirq_init(void)
+{
+	int i;
+
+	for_each_possible_cpu(i)
+		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
+
+	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
+	register_hotcpu_notifier(&blk_cpu_notifier);
+	return 0;
+}
+subsys_initcall(blk_softirq_init);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 304ec73..21e275d 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -156,6 +156,30 @@
 	return ret;
 }
 
+static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
+{
+	unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
+
+	return queue_var_show(set != 0, page);
+}
+
+static ssize_t
+queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
+{
+	ssize_t ret = -EINVAL;
+#if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+	unsigned long val;
+
+	ret = queue_var_store(&val, page, count);
+	spin_lock_irq(q->queue_lock);
+	if (val)
+		queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
+	else
+		queue_flag_clear(QUEUE_FLAG_SAME_COMP,  q);
+	spin_unlock_irq(q->queue_lock);
+#endif
+	return ret;
+}
 
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
@@ -197,6 +221,12 @@
 	.store = queue_nomerges_store,
 };
 
+static struct queue_sysfs_entry queue_rq_affinity_entry = {
+	.attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_rq_affinity_show,
+	.store = queue_rq_affinity_store,
+};
+
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -205,6 +235,7 @@
 	&queue_iosched_entry.attr,
 	&queue_hw_sector_size_entry.attr,
 	&queue_nomerges_entry.attr,
+	&queue_rq_affinity_entry.attr,
 	NULL,
 };
 
@@ -310,7 +341,7 @@
 	if (!q->request_fn)
 		return 0;
 
-	ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj),
+	ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj),
 			  "%s", "queue");
 	if (ret < 0)
 		return ret;
@@ -339,6 +370,6 @@
 
 		kobject_uevent(&q->kobj, KOBJ_REMOVE);
 		kobject_del(&q->kobj);
-		kobject_put(&disk->dev.kobj);
+		kobject_put(&disk_to_dev(disk)->kobj);
 	}
 }
diff --git a/block/blk-tag.c b/block/blk-tag.c
index ed5166f..c0d419e 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -29,7 +29,7 @@
  * __blk_free_tags - release a given set of tag maintenance info
  * @bqt:	the tag map to free
  *
- * Tries to free the specified @bqt@.  Returns true if it was
+ * Tries to free the specified @bqt.  Returns true if it was
  * actually freed and false if there are still references using it
  */
 static int __blk_free_tags(struct blk_queue_tag *bqt)
@@ -78,7 +78,7 @@
  * blk_free_tags - release a given set of tag maintenance info
  * @bqt:	the tag map to free
  *
- * For externally managed @bqt@ frees the map.  Callers of this
+ * For externally managed @bqt frees the map.  Callers of this
  * function must guarantee to have released all the queues that
  * might have been using this tag map.
  */
@@ -94,7 +94,7 @@
  * @q:  the request queue for the device
  *
  *  Notes:
- *	This is used to disabled tagged queuing to a device, yet leave
+ *	This is used to disable tagged queuing to a device, yet leave
  *	queue in function.
  **/
 void blk_queue_free_tags(struct request_queue *q)
@@ -271,7 +271,7 @@
  * @rq: the request that has completed
  *
  *  Description:
- *    Typically called when end_that_request_first() returns 0, meaning
+ *    Typically called when end_that_request_first() returns %0, meaning
  *    all transfers have been done for a request. It's important to call
  *    this function before end_that_request_last(), as that will put the
  *    request back on the free list thus corrupting the internal tag list.
@@ -337,6 +337,7 @@
 int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
+	unsigned max_depth, offset;
 	int tag;
 
 	if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
@@ -350,10 +351,19 @@
 	/*
 	 * Protect against shared tag maps, as we may not have exclusive
 	 * access to the tag map.
+	 *
+	 * We reserve a few tags just for sync IO, since we don't want
+	 * to starve sync IO on behalf of flooding async IO.
 	 */
+	max_depth = bqt->max_depth;
+	if (rq_is_sync(rq))
+		offset = 0;
+	else
+		offset = max_depth >> 2;
+
 	do {
-		tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
-		if (tag >= bqt->max_depth)
+		tag = find_next_zero_bit(bqt->tag_map, max_depth, offset);
+		if (tag >= max_depth)
 			return 1;
 
 	} while (test_and_set_bit_lock(tag, bqt->tag_map));
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
new file mode 100644
index 0000000..972a63f
--- /dev/null
+++ b/block/blk-timeout.c
@@ -0,0 +1,238 @@
+/*
+ * Functions related to generic timeout handling of requests.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/fault-inject.h>
+
+#include "blk.h"
+
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+
+static DECLARE_FAULT_ATTR(fail_io_timeout);
+
+static int __init setup_fail_io_timeout(char *str)
+{
+	return setup_fault_attr(&fail_io_timeout, str);
+}
+__setup("fail_io_timeout=", setup_fail_io_timeout);
+
+int blk_should_fake_timeout(struct request_queue *q)
+{
+	if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
+		return 0;
+
+	return should_fail(&fail_io_timeout, 1);
+}
+
+static int __init fail_io_timeout_debugfs(void)
+{
+	return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout");
+}
+
+late_initcall(fail_io_timeout_debugfs);
+
+ssize_t part_timeout_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	int set = test_bit(QUEUE_FLAG_FAIL_IO, &disk->queue->queue_flags);
+
+	return sprintf(buf, "%d\n", set != 0);
+}
+
+ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	int val;
+
+	if (count) {
+		struct request_queue *q = disk->queue;
+		char *p = (char *) buf;
+
+		val = simple_strtoul(p, &p, 10);
+		spin_lock_irq(q->queue_lock);
+		if (val)
+			queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
+		else
+			queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
+		spin_unlock_irq(q->queue_lock);
+	}
+
+	return count;
+}
+
+#endif /* CONFIG_FAIL_IO_TIMEOUT */
+
+/*
+ * blk_delete_timer - Delete/cancel timer for a given function.
+ * @req:	request that we are canceling timer for
+ *
+ */
+void blk_delete_timer(struct request *req)
+{
+	struct request_queue *q = req->q;
+
+	/*
+	 * Nothing to detach
+	 */
+	if (!q->rq_timed_out_fn || !req->deadline)
+		return;
+
+	list_del_init(&req->timeout_list);
+
+	if (list_empty(&q->timeout_list))
+		del_timer(&q->timeout);
+}
+
+static void blk_rq_timed_out(struct request *req)
+{
+	struct request_queue *q = req->q;
+	enum blk_eh_timer_return ret;
+
+	ret = q->rq_timed_out_fn(req);
+	switch (ret) {
+	case BLK_EH_HANDLED:
+		__blk_complete_request(req);
+		break;
+	case BLK_EH_RESET_TIMER:
+		blk_clear_rq_complete(req);
+		blk_add_timer(req);
+		break;
+	case BLK_EH_NOT_HANDLED:
+		/*
+		 * LLD handles this for now but in the future
+		 * we can send a request msg to abort the command
+		 * and we can move more of the generic scsi eh code to
+		 * the blk layer.
+		 */
+		break;
+	default:
+		printk(KERN_ERR "block: bad eh return: %d\n", ret);
+		break;
+	}
+}
+
+void blk_rq_timed_out_timer(unsigned long data)
+{
+	struct request_queue *q = (struct request_queue *) data;
+	unsigned long flags, uninitialized_var(next), next_set = 0;
+	struct request *rq, *tmp;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+
+	list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) {
+		if (time_after_eq(jiffies, rq->deadline)) {
+			list_del_init(&rq->timeout_list);
+
+			/*
+			 * Check if we raced with end io completion
+			 */
+			if (blk_mark_rq_complete(rq))
+				continue;
+			blk_rq_timed_out(rq);
+		}
+		if (!next_set) {
+			next = rq->deadline;
+			next_set = 1;
+		} else if (time_after(next, rq->deadline))
+			next = rq->deadline;
+	}
+
+	if (next_set && !list_empty(&q->timeout_list))
+		mod_timer(&q->timeout, round_jiffies(next));
+
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+/**
+ * blk_abort_request -- Request request recovery for the specified command
+ * @req:	pointer to the request of interest
+ *
+ * This function requests that the block layer start recovery for the
+ * request by deleting the timer and calling the q's timeout function.
+ * LLDDs who implement their own error recovery MAY ignore the timeout
+ * event if they generated blk_abort_req. Must hold queue lock.
+ */
+void blk_abort_request(struct request *req)
+{
+	if (blk_mark_rq_complete(req))
+		return;
+	blk_delete_timer(req);
+	blk_rq_timed_out(req);
+}
+EXPORT_SYMBOL_GPL(blk_abort_request);
+
+/**
+ * blk_add_timer - Start timeout timer for a single request
+ * @req:	request that is about to start running.
+ *
+ * Notes:
+ *    Each request has its own timer, and as it is added to the queue, we
+ *    set up the timer. When the request completes, we cancel the timer.
+ */
+void blk_add_timer(struct request *req)
+{
+	struct request_queue *q = req->q;
+	unsigned long expiry;
+
+	if (!q->rq_timed_out_fn)
+		return;
+
+	BUG_ON(!list_empty(&req->timeout_list));
+	BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
+
+	if (req->timeout)
+		req->deadline = jiffies + req->timeout;
+	else {
+		req->deadline = jiffies + q->rq_timeout;
+		/*
+		 * Some LLDs, like scsi, peek at the timeout to prevent
+		 * a command from being retried forever.
+		 */
+		req->timeout = q->rq_timeout;
+	}
+	list_add_tail(&req->timeout_list, &q->timeout_list);
+
+	/*
+	 * If the timer isn't already pending or this timeout is earlier
+	 * than an existing one, modify the timer. Round to next nearest
+	 * second.
+	 */
+	expiry = round_jiffies(req->deadline);
+
+	/*
+	 * We use ->deadline == 0 to detect whether a timer was added or
+	 * not, so just increase to next jiffy for that specific case
+	 */
+	if (unlikely(!req->deadline))
+		req->deadline = 1;
+
+	if (!timer_pending(&q->timeout) ||
+	    time_before(expiry, q->timeout.expires))
+		mod_timer(&q->timeout, expiry);
+}
+
+/**
+ * blk_abort_queue -- Abort all request on given queue
+ * @queue:	pointer to queue
+ *
+ */
+void blk_abort_queue(struct request_queue *q)
+{
+	unsigned long flags;
+	struct request *rq, *tmp;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+
+	elv_abort_queue(q);
+
+	list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
+		blk_abort_request(rq);
+
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+}
+EXPORT_SYMBOL_GPL(blk_abort_queue);
diff --git a/block/blk.h b/block/blk.h
index c79f30e..e5c5797 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -17,6 +17,42 @@
 
 void blk_unplug_work(struct work_struct *work);
 void blk_unplug_timeout(unsigned long data);
+void blk_rq_timed_out_timer(unsigned long data);
+void blk_delete_timer(struct request *);
+void blk_add_timer(struct request *);
+
+/*
+ * Internal atomic flags for request handling
+ */
+enum rq_atomic_flags {
+	REQ_ATOM_COMPLETE = 0,
+};
+
+/*
+ * EH timer and IO completion will both attempt to 'grab' the request, make
+ * sure that only one of them suceeds
+ */
+static inline int blk_mark_rq_complete(struct request *rq)
+{
+	return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
+}
+
+static inline void blk_clear_rq_complete(struct request *rq)
+{
+	clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
+}
+
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+int blk_should_fake_timeout(struct request_queue *);
+ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
+ssize_t part_timeout_store(struct device *, struct device_attribute *,
+				const char *, size_t);
+#else
+static inline int blk_should_fake_timeout(struct request_queue *q)
+{
+	return 0;
+}
+#endif
 
 struct io_context *current_io_context(gfp_t gfp_flags, int node);
 
@@ -59,4 +95,16 @@
 
 #endif /* BLK_DEV_INTEGRITY */
 
+static inline int blk_cpu_to_group(int cpu)
+{
+#ifdef CONFIG_SCHED_MC
+	cpumask_t mask = cpu_coregroup_map(cpu);
+	return first_cpu(mask);
+#elif defined(CONFIG_SCHED_SMT)
+	return first_cpu(per_cpu(cpu_sibling_map, cpu));
+#else
+	return cpu;
+#endif
+}
+
 #endif
diff --git a/block/blktrace.c b/block/blktrace.c
index eb9651c..85049a7 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -111,23 +111,9 @@
  */
 static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
 
-/*
- * Bio action bits of interest
- */
-static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) };
-
-/*
- * More could be added as needed, taking care to increment the decrementer
- * to get correct indexing
- */
-#define trace_barrier_bit(rw)	\
-	(((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0))
-#define trace_sync_bit(rw)	\
-	(((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
-#define trace_ahead_bit(rw)	\
-	(((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
-#define trace_meta_bit(rw)	\
-	(((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3))
+/* The ilog2() calls fall out because they're constant */
+#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \
+	  (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) )
 
 /*
  * The worker for the various blk_add_trace*() types. Fills out a
@@ -147,10 +133,11 @@
 		return;
 
 	what |= ddir_act[rw & WRITE];
-	what |= bio_act[trace_barrier_bit(rw)];
-	what |= bio_act[trace_sync_bit(rw)];
-	what |= bio_act[trace_ahead_bit(rw)];
-	what |= bio_act[trace_meta_bit(rw)];
+	what |= MASK_TC_BIT(rw, BARRIER);
+	what |= MASK_TC_BIT(rw, SYNC);
+	what |= MASK_TC_BIT(rw, AHEAD);
+	what |= MASK_TC_BIT(rw, META);
+	what |= MASK_TC_BIT(rw, DISCARD);
 
 	pid = tsk->pid;
 	if (unlikely(act_log_check(bt, what, sector, pid)))
@@ -382,7 +369,8 @@
 	if (!buts->buf_size || !buts->buf_nr)
 		return -EINVAL;
 
-	strcpy(buts->name, name);
+	strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
+	buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
 
 	/*
 	 * some device names have larger paths - convert the slashes
diff --git a/block/bsg.c b/block/bsg.c
index 0aae8d7..56cb343 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -283,7 +283,8 @@
 		next_rq->cmd_type = rq->cmd_type;
 
 		dxferp = (void*)(unsigned long)hdr->din_xferp;
-		ret =  blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len);
+		ret =  blk_rq_map_user(q, next_rq, NULL, dxferp,
+				       hdr->din_xfer_len, GFP_KERNEL);
 		if (ret)
 			goto out;
 	}
@@ -298,7 +299,8 @@
 		dxfer_len = 0;
 
 	if (dxfer_len) {
-		ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
+		ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len,
+				      GFP_KERNEL);
 		if (ret)
 			goto out;
 	}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1e2aff8..6a062ee 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -39,6 +39,7 @@
 #define CFQ_MIN_TT		(2)
 
 #define CFQ_SLICE_SCALE		(5)
+#define CFQ_HW_QUEUE_MIN	(5)
 
 #define RQ_CIC(rq)		\
 	((struct cfq_io_context *) (rq)->elevator_private)
@@ -86,7 +87,14 @@
 
 	int rq_in_driver;
 	int sync_flight;
+
+	/*
+	 * queue-depth detection
+	 */
+	int rq_queued;
 	int hw_tag;
+	int hw_tag_samples;
+	int rq_in_driver_peak;
 
 	/*
 	 * idle window management
@@ -244,7 +252,7 @@
 {
 	if (cfqd->busy_queues) {
 		cfq_log(cfqd, "schedule dispatch");
-		kblockd_schedule_work(&cfqd->unplug_work);
+		kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
 	}
 }
 
@@ -654,15 +662,6 @@
 	cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
 						cfqd->rq_in_driver);
 
-	/*
-	 * If the depth is larger 1, it really could be queueing. But lets
-	 * make the mark a little higher - idling could still be good for
-	 * low queueing, and a low queueing number could also just indicate
-	 * a SCSI mid layer like behaviour where limit+1 is often seen.
-	 */
-	if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
-		cfqd->hw_tag = 1;
-
 	cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
 }
 
@@ -686,6 +685,7 @@
 	list_del_init(&rq->queuelist);
 	cfq_del_rq_rb(rq);
 
+	cfqq->cfqd->rq_queued--;
 	if (rq_is_meta(rq)) {
 		WARN_ON(!cfqq->meta_pending);
 		cfqq->meta_pending--;
@@ -878,6 +878,14 @@
 	struct cfq_io_context *cic;
 	unsigned long sl;
 
+	/*
+	 * SSD device without seek penalty, disable idling. But only do so
+	 * for devices that support queuing, otherwise we still have a problem
+	 * with sync vs async workloads.
+	 */
+	if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)
+		return;
+
 	WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
 	WARN_ON(cfq_cfqq_slice_new(cfqq));
 
@@ -1833,6 +1841,7 @@
 {
 	struct cfq_io_context *cic = RQ_CIC(rq);
 
+	cfqd->rq_queued++;
 	if (rq_is_meta(rq))
 		cfqq->meta_pending++;
 
@@ -1880,6 +1889,31 @@
 	cfq_rq_enqueued(cfqd, cfqq, rq);
 }
 
+/*
+ * Update hw_tag based on peak queue depth over 50 samples under
+ * sufficient load.
+ */
+static void cfq_update_hw_tag(struct cfq_data *cfqd)
+{
+	if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak)
+		cfqd->rq_in_driver_peak = cfqd->rq_in_driver;
+
+	if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
+	    cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
+		return;
+
+	if (cfqd->hw_tag_samples++ < 50)
+		return;
+
+	if (cfqd->rq_in_driver_peak >= CFQ_HW_QUEUE_MIN)
+		cfqd->hw_tag = 1;
+	else
+		cfqd->hw_tag = 0;
+
+	cfqd->hw_tag_samples = 0;
+	cfqd->rq_in_driver_peak = 0;
+}
+
 static void cfq_completed_request(struct request_queue *q, struct request *rq)
 {
 	struct cfq_queue *cfqq = RQ_CFQQ(rq);
@@ -1890,6 +1924,8 @@
 	now = jiffies;
 	cfq_log_cfqq(cfqd, cfqq, "complete");
 
+	cfq_update_hw_tag(cfqd);
+
 	WARN_ON(!cfqd->rq_in_driver);
 	WARN_ON(!cfqq->dispatched);
 	cfqd->rq_in_driver--;
@@ -2200,6 +2236,7 @@
 	cfqd->cfq_slice[1] = cfq_slice_sync;
 	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
+	cfqd->hw_tag = 1;
 
 	return cfqd;
 }
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
index 79c1499..e669aed 100644
--- a/block/cmd-filter.c
+++ b/block/cmd-filter.c
@@ -211,14 +211,10 @@
 {
 	int ret;
 	struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
-	struct kobject *parent = kobject_get(disk->holder_dir->parent);
 
-	if (!parent)
-		return -ENODEV;
-
-	ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent,
+	ret = kobject_init_and_add(&filter->kobj, &rcf_ktype,
+				   &disk_to_dev(disk)->kobj,
 				   "%s", "cmd_filter");
-
 	if (ret < 0)
 		return ret;
 
@@ -231,7 +227,6 @@
 	struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
 
 	kobject_put(&filter->kobj);
-	kobject_put(disk->holder_dir->parent);
 }
 EXPORT_SYMBOL(blk_unregister_filter);
 #endif
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index c23177e..1e559fb 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -788,6 +788,7 @@
 		return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
 	case BLKFLSBUF:
 	case BLKROSET:
+	case BLKDISCARD:
 	/*
 	 * the ones below are implemented in blkdev_locked_ioctl,
 	 * but we call blkdev_ioctl, which gets the lock for us
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 342448c..fd31117 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -33,7 +33,7 @@
 	 */
 	struct rb_root sort_list[2];	
 	struct list_head fifo_list[2];
-	
+
 	/*
 	 * next in sort order. read, write or both are NULL
 	 */
@@ -53,7 +53,11 @@
 
 static void deadline_move_request(struct deadline_data *, struct request *);
 
-#define RQ_RB_ROOT(dd, rq)	(&(dd)->sort_list[rq_data_dir((rq))])
+static inline struct rb_root *
+deadline_rb_root(struct deadline_data *dd, struct request *rq)
+{
+	return &dd->sort_list[rq_data_dir(rq)];
+}
 
 /*
  * get the request after `rq' in sector-sorted order
@@ -72,15 +76,11 @@
 static void
 deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
 {
-	struct rb_root *root = RQ_RB_ROOT(dd, rq);
+	struct rb_root *root = deadline_rb_root(dd, rq);
 	struct request *__alias;
 
-retry:
-	__alias = elv_rb_add(root, rq);
-	if (unlikely(__alias)) {
+	while (unlikely(__alias = elv_rb_add(root, rq)))
 		deadline_move_request(dd, __alias);
-		goto retry;
-	}
 }
 
 static inline void
@@ -91,7 +91,7 @@
 	if (dd->next_rq[data_dir] == rq)
 		dd->next_rq[data_dir] = deadline_latter_request(rq);
 
-	elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
+	elv_rb_del(deadline_rb_root(dd, rq), rq);
 }
 
 /*
@@ -106,7 +106,7 @@
 	deadline_add_rq_rb(dd, rq);
 
 	/*
-	 * set expire time (only used for reads) and add to fifo list
+	 * set expire time and add to fifo list
 	 */
 	rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
 	list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
@@ -162,7 +162,7 @@
 	 * if the merge was a front merge, we need to reposition request
 	 */
 	if (type == ELEVATOR_FRONT_MERGE) {
-		elv_rb_del(RQ_RB_ROOT(dd, req), req);
+		elv_rb_del(deadline_rb_root(dd, req), req);
 		deadline_add_rq_rb(dd, req);
 	}
 }
@@ -212,7 +212,7 @@
 	dd->next_rq[WRITE] = NULL;
 	dd->next_rq[data_dir] = deadline_latter_request(rq);
 
-	dd->last_sector = rq->sector + rq->nr_sectors;
+	dd->last_sector = rq_end_sector(rq);
 
 	/*
 	 * take it off the sort and fifo list, move
@@ -222,7 +222,7 @@
 }
 
 /*
- * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
+ * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
  * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
  */
 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
@@ -258,17 +258,9 @@
 	else
 		rq = dd->next_rq[READ];
 
-	if (rq) {
-		/* we have a "next request" */
-		
-		if (dd->last_sector != rq->sector)
-			/* end the batch on a non sequential request */
-			dd->batching += dd->fifo_batch;
-		
-		if (dd->batching < dd->fifo_batch)
-			/* we are still entitled to batch */
-			goto dispatch_request;
-	}
+	if (rq && dd->batching < dd->fifo_batch)
+		/* we have a next request are still entitled to batch */
+		goto dispatch_request;
 
 	/*
 	 * at this point we are not running a batch. select the appropriate
diff --git a/block/elevator.c b/block/elevator.c
index ed6f8f3..0451892 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -34,8 +34,9 @@
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
 #include <linux/hash.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
+#include "blk.h"
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
@@ -75,6 +76,12 @@
 		return 0;
 
 	/*
+	 * Don't merge file system requests and discard requests
+	 */
+	if (bio_discard(bio) != bio_discard(rq->bio))
+		return 0;
+
+	/*
 	 * different data direction or already started, don't merge
 	 */
 	if (bio_data_dir(bio) != rq_data_dir(rq))
@@ -438,6 +445,8 @@
 	list_for_each_prev(entry, &q->queue_head) {
 		struct request *pos = list_entry_rq(entry);
 
+		if (blk_discard_rq(rq) != blk_discard_rq(pos))
+			break;
 		if (rq_data_dir(rq) != rq_data_dir(pos))
 			break;
 		if (pos->cmd_flags & stop_flags)
@@ -607,7 +616,7 @@
 		break;
 
 	case ELEVATOR_INSERT_SORT:
-		BUG_ON(!blk_fs_request(rq));
+		BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq));
 		rq->cmd_flags |= REQ_SORTED;
 		q->nr_sorted++;
 		if (rq_mergeable(rq)) {
@@ -692,7 +701,7 @@
 		 * this request is scheduling boundary, update
 		 * end_sector
 		 */
-		if (blk_fs_request(rq)) {
+		if (blk_fs_request(rq) || blk_discard_rq(rq)) {
 			q->end_sector = rq_end_sector(rq);
 			q->boundary_rq = rq;
 		}
@@ -745,7 +754,7 @@
 		 * not ever see it.
 		 */
 		if (blk_empty_barrier(rq)) {
-			end_queued_request(rq, 1);
+			__blk_end_request(rq, 0, blk_rq_bytes(rq));
 			continue;
 		}
 		if (!(rq->cmd_flags & REQ_STARTED)) {
@@ -764,6 +773,12 @@
 			 */
 			rq->cmd_flags |= REQ_STARTED;
 			blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+
+			/*
+			 * We are now handing the request to the hardware,
+			 * add the timeout handler
+			 */
+			blk_add_timer(rq);
 		}
 
 		if (!q->boundary_rq || q->boundary_rq == rq) {
@@ -782,7 +797,6 @@
 			 * device can handle
 			 */
 			rq->nr_phys_segments++;
-			rq->nr_hw_segments++;
 		}
 
 		if (!q->prep_rq_fn)
@@ -805,14 +819,13 @@
 				 * so that we don't add it again
 				 */
 				--rq->nr_phys_segments;
-				--rq->nr_hw_segments;
 			}
 
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL) {
 			rq->cmd_flags |= REQ_QUIET;
-			end_queued_request(rq, 0);
+			__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
 			break;
@@ -901,6 +914,19 @@
 	return ELV_MQUEUE_MAY;
 }
 
+void elv_abort_queue(struct request_queue *q)
+{
+	struct request *rq;
+
+	while (!list_empty(&q->queue_head)) {
+		rq = list_entry_rq(q->queue_head.next);
+		rq->cmd_flags |= REQ_QUIET;
+		blk_add_trace_rq(q, rq, BLK_TA_ABORT);
+		__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
+	}
+}
+EXPORT_SYMBOL(elv_abort_queue);
+
 void elv_completed_request(struct request_queue *q, struct request *rq)
 {
 	elevator_t *e = q->elevator;
diff --git a/block/genhd.c b/block/genhd.c
index e0ce23ac..4cd3433 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -16,6 +16,7 @@
 #include <linux/kobj_map.h>
 #include <linux/buffer_head.h>
 #include <linux/mutex.h>
+#include <linux/idr.h>
 
 #include "blk.h"
 
@@ -24,8 +25,194 @@
 struct kobject *block_depr;
 #endif
 
+/* for extended dynamic devt allocation, currently only one major is used */
+#define MAX_EXT_DEVT		(1 << MINORBITS)
+
+/* For extended devt allocation.  ext_devt_mutex prevents look up
+ * results from going away underneath its user.
+ */
+static DEFINE_MUTEX(ext_devt_mutex);
+static DEFINE_IDR(ext_devt_idr);
+
 static struct device_type disk_type;
 
+/**
+ * disk_get_part - get partition
+ * @disk: disk to look partition from
+ * @partno: partition number
+ *
+ * Look for partition @partno from @disk.  If found, increment
+ * reference count and return it.
+ *
+ * CONTEXT:
+ * Don't care.
+ *
+ * RETURNS:
+ * Pointer to the found partition on success, NULL if not found.
+ */
+struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
+{
+	struct hd_struct *part = NULL;
+	struct disk_part_tbl *ptbl;
+
+	if (unlikely(partno < 0))
+		return NULL;
+
+	rcu_read_lock();
+
+	ptbl = rcu_dereference(disk->part_tbl);
+	if (likely(partno < ptbl->len)) {
+		part = rcu_dereference(ptbl->part[partno]);
+		if (part)
+			get_device(part_to_dev(part));
+	}
+
+	rcu_read_unlock();
+
+	return part;
+}
+EXPORT_SYMBOL_GPL(disk_get_part);
+
+/**
+ * disk_part_iter_init - initialize partition iterator
+ * @piter: iterator to initialize
+ * @disk: disk to iterate over
+ * @flags: DISK_PITER_* flags
+ *
+ * Initialize @piter so that it iterates over partitions of @disk.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
+			  unsigned int flags)
+{
+	struct disk_part_tbl *ptbl;
+
+	rcu_read_lock();
+	ptbl = rcu_dereference(disk->part_tbl);
+
+	piter->disk = disk;
+	piter->part = NULL;
+
+	if (flags & DISK_PITER_REVERSE)
+		piter->idx = ptbl->len - 1;
+	else if (flags & DISK_PITER_INCL_PART0)
+		piter->idx = 0;
+	else
+		piter->idx = 1;
+
+	piter->flags = flags;
+
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(disk_part_iter_init);
+
+/**
+ * disk_part_iter_next - proceed iterator to the next partition and return it
+ * @piter: iterator of interest
+ *
+ * Proceed @piter to the next partition and return it.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
+{
+	struct disk_part_tbl *ptbl;
+	int inc, end;
+
+	/* put the last partition */
+	disk_put_part(piter->part);
+	piter->part = NULL;
+
+	/* get part_tbl */
+	rcu_read_lock();
+	ptbl = rcu_dereference(piter->disk->part_tbl);
+
+	/* determine iteration parameters */
+	if (piter->flags & DISK_PITER_REVERSE) {
+		inc = -1;
+		if (piter->flags & DISK_PITER_INCL_PART0)
+			end = -1;
+		else
+			end = 0;
+	} else {
+		inc = 1;
+		end = ptbl->len;
+	}
+
+	/* iterate to the next partition */
+	for (; piter->idx != end; piter->idx += inc) {
+		struct hd_struct *part;
+
+		part = rcu_dereference(ptbl->part[piter->idx]);
+		if (!part)
+			continue;
+		if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects)
+			continue;
+
+		get_device(part_to_dev(part));
+		piter->part = part;
+		piter->idx += inc;
+		break;
+	}
+
+	rcu_read_unlock();
+
+	return piter->part;
+}
+EXPORT_SYMBOL_GPL(disk_part_iter_next);
+
+/**
+ * disk_part_iter_exit - finish up partition iteration
+ * @piter: iter of interest
+ *
+ * Called when iteration is over.  Cleans up @piter.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+void disk_part_iter_exit(struct disk_part_iter *piter)
+{
+	disk_put_part(piter->part);
+	piter->part = NULL;
+}
+EXPORT_SYMBOL_GPL(disk_part_iter_exit);
+
+/**
+ * disk_map_sector_rcu - map sector to partition
+ * @disk: gendisk of interest
+ * @sector: sector to map
+ *
+ * Find out which partition @sector maps to on @disk.  This is
+ * primarily used for stats accounting.
+ *
+ * CONTEXT:
+ * RCU read locked.  The returned partition pointer is valid only
+ * while preemption is disabled.
+ *
+ * RETURNS:
+ * Found partition on success, part0 is returned if no partition matches
+ */
+struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
+{
+	struct disk_part_tbl *ptbl;
+	int i;
+
+	ptbl = rcu_dereference(disk->part_tbl);
+
+	for (i = 1; i < ptbl->len; i++) {
+		struct hd_struct *part = rcu_dereference(ptbl->part[i]);
+
+		if (part && part->start_sect <= sector &&
+		    sector < part->start_sect + part->nr_sects)
+			return part;
+	}
+	return &disk->part0;
+}
+EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
+
 /*
  * Can be deleted altogether. Later.
  *
@@ -43,14 +230,14 @@
 }
 
 #ifdef CONFIG_PROC_FS
-void blkdev_show(struct seq_file *f, off_t offset)
+void blkdev_show(struct seq_file *seqf, off_t offset)
 {
 	struct blk_major_name *dp;
 
 	if (offset < BLKDEV_MAJOR_HASH_SIZE) {
 		mutex_lock(&block_class_lock);
 		for (dp = major_names[offset]; dp; dp = dp->next)
-			seq_printf(f, "%3d %s\n", dp->major, dp->name);
+			seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
 		mutex_unlock(&block_class_lock);
 	}
 }
@@ -136,6 +323,118 @@
 
 static struct kobj_map *bdev_map;
 
+/**
+ * blk_mangle_minor - scatter minor numbers apart
+ * @minor: minor number to mangle
+ *
+ * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
+ * is enabled.  Mangling twice gives the original value.
+ *
+ * RETURNS:
+ * Mangled value.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+static int blk_mangle_minor(int minor)
+{
+#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
+	int i;
+
+	for (i = 0; i < MINORBITS / 2; i++) {
+		int low = minor & (1 << i);
+		int high = minor & (1 << (MINORBITS - 1 - i));
+		int distance = MINORBITS - 1 - 2 * i;
+
+		minor ^= low | high;	/* clear both bits */
+		low <<= distance;	/* swap the positions */
+		high >>= distance;
+		minor |= low | high;	/* and set */
+	}
+#endif
+	return minor;
+}
+
+/**
+ * blk_alloc_devt - allocate a dev_t for a partition
+ * @part: partition to allocate dev_t for
+ * @gfp_mask: memory allocation flag
+ * @devt: out parameter for resulting dev_t
+ *
+ * Allocate a dev_t for block device.
+ *
+ * RETURNS:
+ * 0 on success, allocated dev_t is returned in *@devt.  -errno on
+ * failure.
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
+{
+	struct gendisk *disk = part_to_disk(part);
+	int idx, rc;
+
+	/* in consecutive minor range? */
+	if (part->partno < disk->minors) {
+		*devt = MKDEV(disk->major, disk->first_minor + part->partno);
+		return 0;
+	}
+
+	/* allocate ext devt */
+	do {
+		if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
+			return -ENOMEM;
+		rc = idr_get_new(&ext_devt_idr, part, &idx);
+	} while (rc == -EAGAIN);
+
+	if (rc)
+		return rc;
+
+	if (idx > MAX_EXT_DEVT) {
+		idr_remove(&ext_devt_idr, idx);
+		return -EBUSY;
+	}
+
+	*devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
+	return 0;
+}
+
+/**
+ * blk_free_devt - free a dev_t
+ * @devt: dev_t to free
+ *
+ * Free @devt which was allocated using blk_alloc_devt().
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void blk_free_devt(dev_t devt)
+{
+	might_sleep();
+
+	if (devt == MKDEV(0, 0))
+		return;
+
+	if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
+		mutex_lock(&ext_devt_mutex);
+		idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
+		mutex_unlock(&ext_devt_mutex);
+	}
+}
+
+static char *bdevt_str(dev_t devt, char *buf)
+{
+	if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
+		char tbuf[BDEVT_SIZE];
+		snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
+		snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
+	} else
+		snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
+
+	return buf;
+}
+
 /*
  * Register device numbers dev..(dev+range-1)
  * range must be nonzero
@@ -157,11 +456,11 @@
 
 EXPORT_SYMBOL(blk_unregister_region);
 
-static struct kobject *exact_match(dev_t devt, int *part, void *data)
+static struct kobject *exact_match(dev_t devt, int *partno, void *data)
 {
 	struct gendisk *p = data;
 
-	return &p->dev.kobj;
+	return &disk_to_dev(p)->kobj;
 }
 
 static int exact_lock(dev_t devt, void *data)
@@ -179,21 +478,46 @@
  *
  * This function registers the partitioning information in @disk
  * with the kernel.
+ *
+ * FIXME: error handling
  */
 void add_disk(struct gendisk *disk)
 {
 	struct backing_dev_info *bdi;
+	dev_t devt;
 	int retval;
 
+	/* minors == 0 indicates to use ext devt from part0 and should
+	 * be accompanied with EXT_DEVT flag.  Make sure all
+	 * parameters make sense.
+	 */
+	WARN_ON(disk->minors && !(disk->major || disk->first_minor));
+	WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
+
 	disk->flags |= GENHD_FL_UP;
-	blk_register_region(MKDEV(disk->major, disk->first_minor),
-			    disk->minors, NULL, exact_match, exact_lock, disk);
+
+	retval = blk_alloc_devt(&disk->part0, &devt);
+	if (retval) {
+		WARN_ON(1);
+		return;
+	}
+	disk_to_dev(disk)->devt = devt;
+
+	/* ->major and ->first_minor aren't supposed to be
+	 * dereferenced from here on, but set them just in case.
+	 */
+	disk->major = MAJOR(devt);
+	disk->first_minor = MINOR(devt);
+
+	blk_register_region(disk_devt(disk), disk->minors, NULL,
+			    exact_match, exact_lock, disk);
 	register_disk(disk);
 	blk_register_queue(disk);
 
 	bdi = &disk->queue->backing_dev_info;
-	bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor));
-	retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi");
+	bdi_register_dev(bdi, disk_devt(disk));
+	retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
+				   "bdi");
 	WARN_ON(retval);
 }
 
@@ -202,79 +526,72 @@
 
 void unlink_gendisk(struct gendisk *disk)
 {
-	sysfs_remove_link(&disk->dev.kobj, "bdi");
+	sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
 	bdi_unregister(&disk->queue->backing_dev_info);
 	blk_unregister_queue(disk);
-	blk_unregister_region(MKDEV(disk->major, disk->first_minor),
-			      disk->minors);
+	blk_unregister_region(disk_devt(disk), disk->minors);
 }
 
 /**
  * get_gendisk - get partitioning information for a given device
- * @dev: device to get partitioning information for
+ * @devt: device to get partitioning information for
+ * @part: returned partition index
  *
  * This function gets the structure containing partitioning
- * information for the given device @dev.
+ * information for the given device @devt.
  */
-struct gendisk *get_gendisk(dev_t devt, int *part)
+struct gendisk *get_gendisk(dev_t devt, int *partno)
 {
-	struct kobject *kobj = kobj_lookup(bdev_map, devt, part);
-	struct device *dev = kobj_to_dev(kobj);
+	struct gendisk *disk = NULL;
 
-	return  kobj ? dev_to_disk(dev) : NULL;
-}
+	if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
+		struct kobject *kobj;
 
-/*
- * print a partitions - intended for places where the root filesystem can't be
- * mounted and thus to give the victim some idea of what went wrong
- */
-static int printk_partition(struct device *dev, void *data)
-{
-	struct gendisk *sgp;
-	char buf[BDEVNAME_SIZE];
-	int n;
+		kobj = kobj_lookup(bdev_map, devt, partno);
+		if (kobj)
+			disk = dev_to_disk(kobj_to_dev(kobj));
+	} else {
+		struct hd_struct *part;
 
-	if (dev->type != &disk_type)
-		goto exit;
-
-	sgp = dev_to_disk(dev);
-	/*
-	 * Don't show empty devices or things that have been surpressed
-	 */
-	if (get_capacity(sgp) == 0 ||
-	    (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
-		goto exit;
-
-	/*
-	 * Note, unlike /proc/partitions, I am showing the numbers in
-	 * hex - the same format as the root= option takes.
-	 */
-	printk("%02x%02x %10llu %s",
-		sgp->major, sgp->first_minor,
-		(unsigned long long)get_capacity(sgp) >> 1,
-		disk_name(sgp, 0, buf));
-	if (sgp->driverfs_dev != NULL &&
-	    sgp->driverfs_dev->driver != NULL)
-		printk(" driver: %s\n",
-			sgp->driverfs_dev->driver->name);
-	else
-		printk(" (driver?)\n");
-
-	/* now show the partitions */
-	for (n = 0; n < sgp->minors - 1; ++n) {
-		if (sgp->part[n] == NULL)
-			goto exit;
-		if (sgp->part[n]->nr_sects == 0)
-			goto exit;
-		printk("  %02x%02x %10llu %s\n",
-			sgp->major, n + 1 + sgp->first_minor,
-			(unsigned long long)sgp->part[n]->nr_sects >> 1,
-			disk_name(sgp, n + 1, buf));
+		mutex_lock(&ext_devt_mutex);
+		part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
+		if (part && get_disk(part_to_disk(part))) {
+			*partno = part->partno;
+			disk = part_to_disk(part);
+		}
+		mutex_unlock(&ext_devt_mutex);
 	}
-exit:
-	return 0;
+
+	return disk;
 }
 
+/**
+ * bdget_disk - do bdget() by gendisk and partition number
+ * @disk: gendisk of interest
+ * @partno: partition number
+ *
+ * Find partition @partno from @disk, do bdget() on it.
+ *
+ * CONTEXT:
+ * Don't care.
+ *
+ * RETURNS:
+ * Resulting block_device on success, NULL on failure.
+ */
+struct block_device *bdget_disk(struct gendisk *disk, int partno)
+{
+	struct hd_struct *part;
+	struct block_device *bdev = NULL;
+
+	part = disk_get_part(disk, partno);
+	if (part)
+		bdev = bdget(part_devt(part));
+	disk_put_part(part);
+
+	return bdev;
+}
+EXPORT_SYMBOL(bdget_disk);
+
 /*
  * print a full list of all partitions - intended for places where the root
  * filesystem can't be mounted and thus to give the victim some idea of what
@@ -282,120 +599,145 @@
  */
 void __init printk_all_partitions(void)
 {
-	mutex_lock(&block_class_lock);
-	class_for_each_device(&block_class, NULL, NULL, printk_partition);
-	mutex_unlock(&block_class_lock);
+	struct class_dev_iter iter;
+	struct device *dev;
+
+	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+	while ((dev = class_dev_iter_next(&iter))) {
+		struct gendisk *disk = dev_to_disk(dev);
+		struct disk_part_iter piter;
+		struct hd_struct *part;
+		char name_buf[BDEVNAME_SIZE];
+		char devt_buf[BDEVT_SIZE];
+
+		/*
+		 * Don't show empty devices or things that have been
+		 * surpressed
+		 */
+		if (get_capacity(disk) == 0 ||
+		    (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
+			continue;
+
+		/*
+		 * Note, unlike /proc/partitions, I am showing the
+		 * numbers in hex - the same format as the root=
+		 * option takes.
+		 */
+		disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+		while ((part = disk_part_iter_next(&piter))) {
+			bool is_part0 = part == &disk->part0;
+
+			printk("%s%s %10llu %s", is_part0 ? "" : "  ",
+			       bdevt_str(part_devt(part), devt_buf),
+			       (unsigned long long)part->nr_sects >> 1,
+			       disk_name(disk, part->partno, name_buf));
+			if (is_part0) {
+				if (disk->driverfs_dev != NULL &&
+				    disk->driverfs_dev->driver != NULL)
+					printk(" driver: %s\n",
+					      disk->driverfs_dev->driver->name);
+				else
+					printk(" (driver?)\n");
+			} else
+				printk("\n");
+		}
+		disk_part_iter_exit(&piter);
+	}
+	class_dev_iter_exit(&iter);
 }
 
 #ifdef CONFIG_PROC_FS
 /* iterator */
-static int find_start(struct device *dev, void *data)
+static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
 {
-	loff_t *k = data;
+	loff_t skip = *pos;
+	struct class_dev_iter *iter;
+	struct device *dev;
 
-	if (dev->type != &disk_type)
-		return 0;
-	if (!*k)
-		return 1;
-	(*k)--;
-	return 0;
+	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return ERR_PTR(-ENOMEM);
+
+	seqf->private = iter;
+	class_dev_iter_init(iter, &block_class, NULL, &disk_type);
+	do {
+		dev = class_dev_iter_next(iter);
+		if (!dev)
+			return NULL;
+	} while (skip--);
+
+	return dev_to_disk(dev);
 }
 
-static void *part_start(struct seq_file *part, loff_t *pos)
+static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
 {
 	struct device *dev;
-	loff_t k = *pos;
 
-	if (!k)
-		part->private = (void *)1LU;	/* tell show to print header */
-
-	mutex_lock(&block_class_lock);
-	dev = class_find_device(&block_class, NULL, &k, find_start);
-	if (dev) {
-		put_device(dev);
+	(*pos)++;
+	dev = class_dev_iter_next(seqf->private);
+	if (dev)
 		return dev_to_disk(dev);
-	}
+
 	return NULL;
 }
 
-static int find_next(struct device *dev, void *data)
+static void disk_seqf_stop(struct seq_file *seqf, void *v)
 {
-	if (dev->type == &disk_type)
-		return 1;
-	return 0;
-}
+	struct class_dev_iter *iter = seqf->private;
 
-static void *part_next(struct seq_file *part, void *v, loff_t *pos)
-{
-	struct gendisk *gp = v;
-	struct device *dev;
-	++*pos;
-	dev = class_find_device(&block_class, &gp->dev, NULL, find_next);
-	if (dev) {
-		put_device(dev);
-		return dev_to_disk(dev);
+	/* stop is called even after start failed :-( */
+	if (iter) {
+		class_dev_iter_exit(iter);
+		kfree(iter);
 	}
-	return NULL;
 }
 
-static void part_stop(struct seq_file *part, void *v)
+static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
 {
-	mutex_unlock(&block_class_lock);
+	static void *p;
+
+	p = disk_seqf_start(seqf, pos);
+	if (!IS_ERR(p) && p && !*pos)
+		seq_puts(seqf, "major minor  #blocks  name\n\n");
+	return p;
 }
 
-static int show_partition(struct seq_file *part, void *v)
+static int show_partition(struct seq_file *seqf, void *v)
 {
 	struct gendisk *sgp = v;
-	int n;
+	struct disk_part_iter piter;
+	struct hd_struct *part;
 	char buf[BDEVNAME_SIZE];
 
-	/*
-	 * Print header if start told us to do.  This is to preserve
-	 * the original behavior of not printing header if no
-	 * partition exists.  This hackery will be removed later with
-	 * class iteration clean up.
-	 */
-	if (part->private) {
-		seq_puts(part, "major minor  #blocks  name\n\n");
-		part->private = NULL;
-	}
-
 	/* Don't show non-partitionable removeable devices or empty devices */
-	if (!get_capacity(sgp) ||
-			(sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE)))
+	if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
+				   (sgp->flags & GENHD_FL_REMOVABLE)))
 		return 0;
 	if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
 		return 0;
 
 	/* show the full disk and all non-0 size partitions of it */
-	seq_printf(part, "%4d  %4d %10llu %s\n",
-		sgp->major, sgp->first_minor,
-		(unsigned long long)get_capacity(sgp) >> 1,
-		disk_name(sgp, 0, buf));
-	for (n = 0; n < sgp->minors - 1; n++) {
-		if (!sgp->part[n])
-			continue;
-		if (sgp->part[n]->nr_sects == 0)
-			continue;
-		seq_printf(part, "%4d  %4d %10llu %s\n",
-			sgp->major, n + 1 + sgp->first_minor,
-			(unsigned long long)sgp->part[n]->nr_sects >> 1 ,
-			disk_name(sgp, n + 1, buf));
-	}
+	disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
+	while ((part = disk_part_iter_next(&piter)))
+		seq_printf(seqf, "%4d  %7d %10llu %s\n",
+			   MAJOR(part_devt(part)), MINOR(part_devt(part)),
+			   (unsigned long long)part->nr_sects >> 1,
+			   disk_name(sgp, part->partno, buf));
+	disk_part_iter_exit(&piter);
 
 	return 0;
 }
 
 const struct seq_operations partitions_op = {
-	.start	= part_start,
-	.next	= part_next,
-	.stop	= part_stop,
+	.start	= show_partition_start,
+	.next	= disk_seqf_next,
+	.stop	= disk_seqf_stop,
 	.show	= show_partition
 };
 #endif
 
 
-static struct kobject *base_probe(dev_t devt, int *part, void *data)
+static struct kobject *base_probe(dev_t devt, int *partno, void *data)
 {
 	if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
 		/* Make old-style 2.4 aliases work */
@@ -431,6 +773,14 @@
 	return sprintf(buf, "%d\n", disk->minors);
 }
 
+static ssize_t disk_ext_range_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%d\n", disk_max_parts(disk));
+}
+
 static ssize_t disk_removable_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
@@ -445,15 +795,7 @@
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
-	return sprintf(buf, "%d\n", disk->policy ? 1 : 0);
-}
-
-static ssize_t disk_size_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	struct gendisk *disk = dev_to_disk(dev);
-
-	return sprintf(buf, "%llu\n", (unsigned long long)get_capacity(disk));
+	return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
 }
 
 static ssize_t disk_capability_show(struct device *dev,
@@ -464,73 +806,26 @@
 	return sprintf(buf, "%x\n", disk->flags);
 }
 
-static ssize_t disk_stat_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	struct gendisk *disk = dev_to_disk(dev);
-
-	preempt_disable();
-	disk_round_stats(disk);
-	preempt_enable();
-	return sprintf(buf,
-		"%8lu %8lu %8llu %8u "
-		"%8lu %8lu %8llu %8u "
-		"%8u %8u %8u"
-		"\n",
-		disk_stat_read(disk, ios[READ]),
-		disk_stat_read(disk, merges[READ]),
-		(unsigned long long)disk_stat_read(disk, sectors[READ]),
-		jiffies_to_msecs(disk_stat_read(disk, ticks[READ])),
-		disk_stat_read(disk, ios[WRITE]),
-		disk_stat_read(disk, merges[WRITE]),
-		(unsigned long long)disk_stat_read(disk, sectors[WRITE]),
-		jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])),
-		disk->in_flight,
-		jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
-		jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
-}
-
-#ifdef CONFIG_FAIL_MAKE_REQUEST
-static ssize_t disk_fail_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	struct gendisk *disk = dev_to_disk(dev);
-
-	return sprintf(buf, "%d\n", disk->flags & GENHD_FL_FAIL ? 1 : 0);
-}
-
-static ssize_t disk_fail_store(struct device *dev,
-			       struct device_attribute *attr,
-			       const char *buf, size_t count)
-{
-	struct gendisk *disk = dev_to_disk(dev);
-	int i;
-
-	if (count > 0 && sscanf(buf, "%d", &i) > 0) {
-		if (i == 0)
-			disk->flags &= ~GENHD_FL_FAIL;
-		else
-			disk->flags |= GENHD_FL_FAIL;
-	}
-
-	return count;
-}
-
-#endif
-
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
+static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
 static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
-static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL);
+static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
-static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL);
+static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
-	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, disk_fail_show, disk_fail_store);
+	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
+#endif
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+static struct device_attribute dev_attr_fail_timeout =
+	__ATTR(io-timeout-fail,  S_IRUGO|S_IWUSR, part_timeout_show,
+		part_timeout_store);
 #endif
 
 static struct attribute *disk_attrs[] = {
 	&dev_attr_range.attr,
+	&dev_attr_ext_range.attr,
 	&dev_attr_removable.attr,
 	&dev_attr_ro.attr,
 	&dev_attr_size.attr,
@@ -539,6 +834,9 @@
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+	&dev_attr_fail_timeout.attr,
+#endif
 	NULL
 };
 
@@ -551,13 +849,87 @@
 	NULL
 };
 
+static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
+{
+	struct disk_part_tbl *ptbl =
+		container_of(head, struct disk_part_tbl, rcu_head);
+
+	kfree(ptbl);
+}
+
+/**
+ * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
+ * @disk: disk to replace part_tbl for
+ * @new_ptbl: new part_tbl to install
+ *
+ * Replace disk->part_tbl with @new_ptbl in RCU-safe way.  The
+ * original ptbl is freed using RCU callback.
+ *
+ * LOCKING:
+ * Matching bd_mutx locked.
+ */
+static void disk_replace_part_tbl(struct gendisk *disk,
+				  struct disk_part_tbl *new_ptbl)
+{
+	struct disk_part_tbl *old_ptbl = disk->part_tbl;
+
+	rcu_assign_pointer(disk->part_tbl, new_ptbl);
+	if (old_ptbl)
+		call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
+}
+
+/**
+ * disk_expand_part_tbl - expand disk->part_tbl
+ * @disk: disk to expand part_tbl for
+ * @partno: expand such that this partno can fit in
+ *
+ * Expand disk->part_tbl such that @partno can fit in.  disk->part_tbl
+ * uses RCU to allow unlocked dereferencing for stats and other stuff.
+ *
+ * LOCKING:
+ * Matching bd_mutex locked, might sleep.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int disk_expand_part_tbl(struct gendisk *disk, int partno)
+{
+	struct disk_part_tbl *old_ptbl = disk->part_tbl;
+	struct disk_part_tbl *new_ptbl;
+	int len = old_ptbl ? old_ptbl->len : 0;
+	int target = partno + 1;
+	size_t size;
+	int i;
+
+	/* disk_max_parts() is zero during initialization, ignore if so */
+	if (disk_max_parts(disk) && target > disk_max_parts(disk))
+		return -EINVAL;
+
+	if (target <= len)
+		return 0;
+
+	size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
+	new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
+	if (!new_ptbl)
+		return -ENOMEM;
+
+	INIT_RCU_HEAD(&new_ptbl->rcu_head);
+	new_ptbl->len = target;
+
+	for (i = 0; i < len; i++)
+		rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
+
+	disk_replace_part_tbl(disk, new_ptbl);
+	return 0;
+}
+
 static void disk_release(struct device *dev)
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
 	kfree(disk->random);
-	kfree(disk->part);
-	free_disk_stats(disk);
+	disk_replace_part_tbl(disk, NULL);
+	free_part_stats(&disk->part0);
 	kfree(disk);
 }
 struct class block_class = {
@@ -578,83 +950,31 @@
  * The output looks suspiciously like /proc/partitions with a bunch of
  * extra fields.
  */
-
-static void *diskstats_start(struct seq_file *part, loff_t *pos)
-{
-	struct device *dev;
-	loff_t k = *pos;
-
-	mutex_lock(&block_class_lock);
-	dev = class_find_device(&block_class, NULL, &k, find_start);
-	if (dev) {
-		put_device(dev);
-		return dev_to_disk(dev);
-	}
-	return NULL;
-}
-
-static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos)
+static int diskstats_show(struct seq_file *seqf, void *v)
 {
 	struct gendisk *gp = v;
-	struct device *dev;
-
-	++*pos;
-	dev = class_find_device(&block_class, &gp->dev, NULL, find_next);
-	if (dev) {
-		put_device(dev);
-		return dev_to_disk(dev);
-	}
-	return NULL;
-}
-
-static void diskstats_stop(struct seq_file *part, void *v)
-{
-	mutex_unlock(&block_class_lock);
-}
-
-static int diskstats_show(struct seq_file *s, void *v)
-{
-	struct gendisk *gp = v;
+	struct disk_part_iter piter;
+	struct hd_struct *hd;
 	char buf[BDEVNAME_SIZE];
-	int n = 0;
+	int cpu;
 
 	/*
-	if (&gp->dev.kobj.entry == block_class.devices.next)
-		seq_puts(s,	"major minor name"
+	if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
+		seq_puts(seqf,	"major minor name"
 				"     rio rmerge rsect ruse wio wmerge "
 				"wsect wuse running use aveq"
 				"\n\n");
 	*/
  
-	preempt_disable();
-	disk_round_stats(gp);
-	preempt_enable();
-	seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n",
-		gp->major, n + gp->first_minor, disk_name(gp, n, buf),
-		disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]),
-		(unsigned long long)disk_stat_read(gp, sectors[0]),
-		jiffies_to_msecs(disk_stat_read(gp, ticks[0])),
-		disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]),
-		(unsigned long long)disk_stat_read(gp, sectors[1]),
-		jiffies_to_msecs(disk_stat_read(gp, ticks[1])),
-		gp->in_flight,
-		jiffies_to_msecs(disk_stat_read(gp, io_ticks)),
-		jiffies_to_msecs(disk_stat_read(gp, time_in_queue)));
-
-	/* now show all non-0 size partitions of it */
-	for (n = 0; n < gp->minors - 1; n++) {
-		struct hd_struct *hd = gp->part[n];
-
-		if (!hd || !hd->nr_sects)
-			continue;
-
-		preempt_disable();
-		part_round_stats(hd);
-		preempt_enable();
-		seq_printf(s, "%4d %4d %s %lu %lu %llu "
+	disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0);
+	while ((hd = disk_part_iter_next(&piter))) {
+		cpu = part_stat_lock();
+		part_round_stats(cpu, hd);
+		part_stat_unlock();
+		seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
 			   "%u %lu %lu %llu %u %u %u %u\n",
-			   gp->major, n + gp->first_minor + 1,
-			   disk_name(gp, n + 1, buf),
+			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
+			   disk_name(gp, hd->partno, buf),
 			   part_stat_read(hd, ios[0]),
 			   part_stat_read(hd, merges[0]),
 			   (unsigned long long)part_stat_read(hd, sectors[0]),
@@ -668,14 +988,15 @@
 			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
 			);
 	}
+	disk_part_iter_exit(&piter);
  
 	return 0;
 }
 
 const struct seq_operations diskstats_op = {
-	.start	= diskstats_start,
-	.next	= diskstats_next,
-	.stop	= diskstats_stop,
+	.start	= disk_seqf_start,
+	.next	= disk_seqf_next,
+	.stop	= disk_seqf_stop,
 	.show	= diskstats_show
 };
 #endif /* CONFIG_PROC_FS */
@@ -690,7 +1011,7 @@
 	 * set enviroment vars to indicate which event this is for
 	 * so that user space will know to go check the media status.
 	 */
-	kobject_uevent_env(&gd->dev.kobj, KOBJ_CHANGE, envp);
+	kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
 	put_device(gd->driverfs_dev);
 }
 
@@ -703,42 +1024,29 @@
 EXPORT_SYMBOL_GPL(genhd_media_change_notify);
 #endif  /*  0  */
 
-struct find_block {
-	const char *name;
-	int part;
-};
-
-static int match_id(struct device *dev, void *data)
+dev_t blk_lookup_devt(const char *name, int partno)
 {
-	struct find_block *find = data;
-
-	if (dev->type != &disk_type)
-		return 0;
-	if (strcmp(dev->bus_id, find->name) == 0) {
-		struct gendisk *disk = dev_to_disk(dev);
-		if (find->part < disk->minors)
-			return 1;
-	}
-	return 0;
-}
-
-dev_t blk_lookup_devt(const char *name, int part)
-{
-	struct device *dev;
 	dev_t devt = MKDEV(0, 0);
-	struct find_block find;
+	struct class_dev_iter iter;
+	struct device *dev;
 
-	mutex_lock(&block_class_lock);
-	find.name = name;
-	find.part = part;
-	dev = class_find_device(&block_class, NULL, &find, match_id);
-	if (dev) {
-		put_device(dev);
-		devt = MKDEV(MAJOR(dev->devt),
-			     MINOR(dev->devt) + part);
+	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+	while ((dev = class_dev_iter_next(&iter))) {
+		struct gendisk *disk = dev_to_disk(dev);
+		struct hd_struct *part;
+
+		if (strcmp(dev->bus_id, name))
+			continue;
+
+		part = disk_get_part(disk, partno);
+		if (part) {
+			devt = part_devt(part);
+			disk_put_part(part);
+			break;
+		}
+		disk_put_part(part);
 	}
-	mutex_unlock(&block_class_lock);
-
+	class_dev_iter_exit(&iter);
 	return devt;
 }
 EXPORT_SYMBOL(blk_lookup_devt);
@@ -747,6 +1055,7 @@
 {
 	return alloc_disk_node(minors, -1);
 }
+EXPORT_SYMBOL(alloc_disk);
 
 struct gendisk *alloc_disk_node(int minors, int node_id)
 {
@@ -755,32 +1064,28 @@
 	disk = kmalloc_node(sizeof(struct gendisk),
 				GFP_KERNEL | __GFP_ZERO, node_id);
 	if (disk) {
-		if (!init_disk_stats(disk)) {
+		if (!init_part_stats(&disk->part0)) {
 			kfree(disk);
 			return NULL;
 		}
-		if (minors > 1) {
-			int size = (minors - 1) * sizeof(struct hd_struct *);
-			disk->part = kmalloc_node(size,
-				GFP_KERNEL | __GFP_ZERO, node_id);
-			if (!disk->part) {
-				free_disk_stats(disk);
-				kfree(disk);
-				return NULL;
-			}
+		if (disk_expand_part_tbl(disk, 0)) {
+			free_part_stats(&disk->part0);
+			kfree(disk);
+			return NULL;
 		}
+		disk->part_tbl->part[0] = &disk->part0;
+
 		disk->minors = minors;
 		rand_initialize_disk(disk);
-		disk->dev.class = &block_class;
-		disk->dev.type = &disk_type;
-		device_initialize(&disk->dev);
+		disk_to_dev(disk)->class = &block_class;
+		disk_to_dev(disk)->type = &disk_type;
+		device_initialize(disk_to_dev(disk));
 		INIT_WORK(&disk->async_notify,
 			media_change_notify_thread);
+		disk->node_id = node_id;
 	}
 	return disk;
 }
-
-EXPORT_SYMBOL(alloc_disk);
 EXPORT_SYMBOL(alloc_disk_node);
 
 struct kobject *get_disk(struct gendisk *disk)
@@ -793,7 +1098,7 @@
 	owner = disk->fops->owner;
 	if (owner && !try_module_get(owner))
 		return NULL;
-	kobj = kobject_get(&disk->dev.kobj);
+	kobj = kobject_get(&disk_to_dev(disk)->kobj);
 	if (kobj == NULL) {
 		module_put(owner);
 		return NULL;
@@ -807,27 +1112,28 @@
 void put_disk(struct gendisk *disk)
 {
 	if (disk)
-		kobject_put(&disk->dev.kobj);
+		kobject_put(&disk_to_dev(disk)->kobj);
 }
 
 EXPORT_SYMBOL(put_disk);
 
 void set_device_ro(struct block_device *bdev, int flag)
 {
-	if (bdev->bd_contains != bdev)
-		bdev->bd_part->policy = flag;
-	else
-		bdev->bd_disk->policy = flag;
+	bdev->bd_part->policy = flag;
 }
 
 EXPORT_SYMBOL(set_device_ro);
 
 void set_disk_ro(struct gendisk *disk, int flag)
 {
-	int i;
-	disk->policy = flag;
-	for (i = 0; i < disk->minors - 1; i++)
-		if (disk->part[i]) disk->part[i]->policy = flag;
+	struct disk_part_iter piter;
+	struct hd_struct *part;
+
+	disk_part_iter_init(&piter, disk,
+			    DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0);
+	while ((part = disk_part_iter_next(&piter)))
+		part->policy = flag;
+	disk_part_iter_exit(&piter);
 }
 
 EXPORT_SYMBOL(set_disk_ro);
@@ -836,18 +1142,15 @@
 {
 	if (!bdev)
 		return 0;
-	else if (bdev->bd_contains != bdev)
-		return bdev->bd_part->policy;
-	else
-		return bdev->bd_disk->policy;
+	return bdev->bd_part->policy;
 }
 
 EXPORT_SYMBOL(bdev_read_only);
 
-int invalidate_partition(struct gendisk *disk, int index)
+int invalidate_partition(struct gendisk *disk, int partno)
 {
 	int res = 0;
-	struct block_device *bdev = bdget_disk(disk, index);
+	struct block_device *bdev = bdget_disk(disk, partno);
 	if (bdev) {
 		fsync_bdev(bdev);
 		res = __invalidate_device(bdev);
diff --git a/block/ioctl.c b/block/ioctl.c
index 77185e5..38bee321 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -12,11 +12,12 @@
 {
 	struct block_device *bdevp;
 	struct gendisk *disk;
+	struct hd_struct *part;
 	struct blkpg_ioctl_arg a;
 	struct blkpg_partition p;
+	struct disk_part_iter piter;
 	long long start, length;
-	int part;
-	int i;
+	int partno;
 	int err;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -28,8 +29,8 @@
 	disk = bdev->bd_disk;
 	if (bdev != bdev->bd_contains)
 		return -EINVAL;
-	part = p.pno;
-	if (part <= 0 || part >= disk->minors)
+	partno = p.pno;
+	if (partno <= 0)
 		return -EINVAL;
 	switch (a.op) {
 		case BLKPG_ADD_PARTITION:
@@ -43,36 +44,37 @@
 				    || pstart < 0 || plength < 0)
 					return -EINVAL;
 			}
-			/* partition number in use? */
-			mutex_lock(&bdev->bd_mutex);
-			if (disk->part[part - 1]) {
-				mutex_unlock(&bdev->bd_mutex);
-				return -EBUSY;
-			}
-			/* overlap? */
-			for (i = 0; i < disk->minors - 1; i++) {
-				struct hd_struct *s = disk->part[i];
 
-				if (!s)
-					continue;
-				if (!(start+length <= s->start_sect ||
-				      start >= s->start_sect + s->nr_sects)) {
+			mutex_lock(&bdev->bd_mutex);
+
+			/* overlap? */
+			disk_part_iter_init(&piter, disk,
+					    DISK_PITER_INCL_EMPTY);
+			while ((part = disk_part_iter_next(&piter))) {
+				if (!(start + length <= part->start_sect ||
+				      start >= part->start_sect + part->nr_sects)) {
+					disk_part_iter_exit(&piter);
 					mutex_unlock(&bdev->bd_mutex);
 					return -EBUSY;
 				}
 			}
+			disk_part_iter_exit(&piter);
+
 			/* all seems OK */
-			err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE);
+			err = add_partition(disk, partno, start, length,
+					    ADDPART_FLAG_NONE);
 			mutex_unlock(&bdev->bd_mutex);
 			return err;
 		case BLKPG_DEL_PARTITION:
-			if (!disk->part[part-1])
+			part = disk_get_part(disk, partno);
+			if (!part)
 				return -ENXIO;
-			if (disk->part[part - 1]->nr_sects == 0)
-				return -ENXIO;
-			bdevp = bdget_disk(disk, part);
+
+			bdevp = bdget(part_devt(part));
+			disk_put_part(part);
 			if (!bdevp)
 				return -ENOMEM;
+
 			mutex_lock(&bdevp->bd_mutex);
 			if (bdevp->bd_openers) {
 				mutex_unlock(&bdevp->bd_mutex);
@@ -84,7 +86,7 @@
 			invalidate_bdev(bdevp);
 
 			mutex_lock_nested(&bdev->bd_mutex, 1);
-			delete_partition(disk, part);
+			delete_partition(disk, partno);
 			mutex_unlock(&bdev->bd_mutex);
 			mutex_unlock(&bdevp->bd_mutex);
 			bdput(bdevp);
@@ -100,7 +102,7 @@
 	struct gendisk *disk = bdev->bd_disk;
 	int res;
 
-	if (disk->minors == 1 || bdev != bdev->bd_contains)
+	if (!disk_partitionable(disk) || bdev != bdev->bd_contains)
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
@@ -111,6 +113,69 @@
 	return res;
 }
 
+static void blk_ioc_discard_endio(struct bio *bio, int err)
+{
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+	}
+	complete(bio->bi_private);
+}
+
+static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
+			     uint64_t len)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+	int ret = 0;
+
+	if (start & 511)
+		return -EINVAL;
+	if (len & 511)
+		return -EINVAL;
+	start >>= 9;
+	len >>= 9;
+
+	if (start + len > (bdev->bd_inode->i_size >> 9))
+		return -EINVAL;
+
+	if (!q->prepare_discard_fn)
+		return -EOPNOTSUPP;
+
+	while (len && !ret) {
+		DECLARE_COMPLETION_ONSTACK(wait);
+		struct bio *bio;
+
+		bio = bio_alloc(GFP_KERNEL, 0);
+		if (!bio)
+			return -ENOMEM;
+
+		bio->bi_end_io = blk_ioc_discard_endio;
+		bio->bi_bdev = bdev;
+		bio->bi_private = &wait;
+		bio->bi_sector = start;
+
+		if (len > q->max_hw_sectors) {
+			bio->bi_size = q->max_hw_sectors << 9;
+			len -= q->max_hw_sectors;
+			start += q->max_hw_sectors;
+		} else {
+			bio->bi_size = len << 9;
+			len = 0;
+		}
+		submit_bio(DISCARD_NOBARRIER, bio);
+
+		wait_for_completion(&wait);
+
+		if (bio_flagged(bio, BIO_EOPNOTSUPP))
+			ret = -EOPNOTSUPP;
+		else if (!bio_flagged(bio, BIO_UPTODATE))
+			ret = -EIO;
+		bio_put(bio);
+	}
+	return ret;
+}
+
 static int put_ushort(unsigned long arg, unsigned short val)
 {
 	return put_user(val, (unsigned short __user *)arg);
@@ -258,6 +323,19 @@
 		set_device_ro(bdev, n);
 		unlock_kernel();
 		return 0;
+
+	case BLKDISCARD: {
+		uint64_t range[2];
+
+		if (!(file->f_mode & FMODE_WRITE))
+			return -EBADF;
+
+		if (copy_from_user(range, (void __user *)arg, sizeof(range)))
+			return -EFAULT;
+
+		return blk_ioctl_discard(bdev, range[0], range[1]);
+	}
+
 	case HDIO_GETGEO: {
 		struct hd_geometry geo;
 
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index ec4b7f2..c34272a 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -185,6 +185,7 @@
 	__set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
 	__set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
 	__set_bit(GPCMD_SET_STREAMING, filter->write_ok);
+	__set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok);
 }
 EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults);
 
@@ -313,11 +314,12 @@
 			goto out;
 		}
 
-		ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count,
-					  hdr->dxfer_len);
+		ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count,
+					  hdr->dxfer_len, GFP_KERNEL);
 		kfree(iov);
 	} else if (hdr->dxfer_len)
-		ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len);
+		ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len,
+				      GFP_KERNEL);
 
 	if (ret)
 		goto out;
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 11c8c19..f17cd4b 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -663,7 +663,7 @@
 
 config PATA_PLATFORM
 	tristate "Generic platform device PATA support"
-	depends on EMBEDDED || ARCH_RPC || PPC || HAVE_PATA_PLATFORM
+	depends on EMBEDDED || PPC || HAVE_PATA_PLATFORM
 	help
 	  This option enables support for generic directly connected ATA
 	  devices commonly found on embedded systems.
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 2e1a7cb..aeadd00 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -267,8 +267,8 @@
 					 	 * per PM slot */
 };
 
-static int ahci_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int ahci_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int ahci_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int ahci_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
 static unsigned int ahci_qc_issue(struct ata_queued_cmd *qc);
 static bool ahci_qc_fill_rtf(struct ata_queued_cmd *qc);
@@ -316,6 +316,7 @@
 
 static struct device_attribute *ahci_sdev_attrs[] = {
 	&dev_attr_sw_activity,
+	&dev_attr_unload_heads,
 	NULL
 };
 
@@ -820,10 +821,10 @@
 	return 0;
 }
 
-static int ahci_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int ahci_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
-	void __iomem *port_mmio = ahci_port_base(ap);
-	int offset = ahci_scr_offset(ap, sc_reg);
+	void __iomem *port_mmio = ahci_port_base(link->ap);
+	int offset = ahci_scr_offset(link->ap, sc_reg);
 
 	if (offset) {
 		*val = readl(port_mmio + offset);
@@ -832,10 +833,10 @@
 	return -EINVAL;
 }
 
-static int ahci_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int ahci_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
-	void __iomem *port_mmio = ahci_port_base(ap);
-	int offset = ahci_scr_offset(ap, sc_reg);
+	void __iomem *port_mmio = ahci_port_base(link->ap);
+	int offset = ahci_scr_offset(link->ap, sc_reg);
 
 	if (offset) {
 		writel(val, port_mmio + offset);
@@ -973,7 +974,7 @@
 	writel(PORT_IRQ_PHYRDY, port_mmio + PORT_IRQ_STAT);
 
 	/* go ahead and clean out PhyRdy Change from Serror too */
-	ahci_scr_write(ap, SCR_ERROR, ((1 << 16) | (1 << 18)));
+	ahci_scr_write(&ap->link, SCR_ERROR, ((1 << 16) | (1 << 18)));
 
 	/*
  	 * Clear flag to indicate that we should ignore all PhyRdy
@@ -1937,8 +1938,8 @@
 	ata_ehi_push_desc(host_ehi, "irq_stat 0x%08x", irq_stat);
 
 	/* AHCI needs SError cleared; otherwise, it might lock up */
-	ahci_scr_read(ap, SCR_ERROR, &serror);
-	ahci_scr_write(ap, SCR_ERROR, serror);
+	ahci_scr_read(&ap->link, SCR_ERROR, &serror);
+	ahci_scr_write(&ap->link, SCR_ERROR, serror);
 	host_ehi->serror |= serror;
 
 	/* some controllers set IRQ_IF_ERR on device errors, ignore it */
@@ -2027,7 +2028,7 @@
 	if ((hpriv->flags & AHCI_HFLAG_NO_HOTPLUG) &&
 		(status & PORT_IRQ_PHYRDY)) {
 		status &= ~PORT_IRQ_PHYRDY;
-		ahci_scr_write(ap, SCR_ERROR, ((1 << 16) | (1 << 18)));
+		ahci_scr_write(&ap->link, SCR_ERROR, ((1 << 16) | (1 << 18)));
 	}
 
 	if (unlikely(status & PORT_IRQ_ERROR)) {
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index e6b4606..e9e32ed 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -165,8 +165,10 @@
 static void ich_set_dmamode(struct ata_port *ap, struct ata_device *adev);
 static int ich_pata_cable_detect(struct ata_port *ap);
 static u8 piix_vmw_bmdma_status(struct ata_port *ap);
-static int piix_sidpr_scr_read(struct ata_port *ap, unsigned int reg, u32 *val);
-static int piix_sidpr_scr_write(struct ata_port *ap, unsigned int reg, u32 val);
+static int piix_sidpr_scr_read(struct ata_link *link,
+			       unsigned int reg, u32 *val);
+static int piix_sidpr_scr_write(struct ata_link *link,
+				unsigned int reg, u32 val);
 #ifdef CONFIG_PM
 static int piix_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg);
 static int piix_pci_device_resume(struct pci_dev *pdev);
@@ -278,12 +280,15 @@
 	/* SATA Controller IDE (PCH) */
 	{ 0x8086, 0x3b20, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata },
 	/* SATA Controller IDE (PCH) */
+	{ 0x8086, 0x3b21, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata },
+	/* SATA Controller IDE (PCH) */
 	{ 0x8086, 0x3b26, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata },
 	/* SATA Controller IDE (PCH) */
+	{ 0x8086, 0x3b28, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata },
+	/* SATA Controller IDE (PCH) */
 	{ 0x8086, 0x3b2d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata },
 	/* SATA Controller IDE (PCH) */
 	{ 0x8086, 0x3b2e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata },
-
 	{ }	/* terminate list */
 };
 
@@ -582,6 +587,7 @@
 	{ 0x27DF, 0x1025, 0x0110 },	/* ICH7 on Acer 3682WLMi */
 	{ 0x27DF, 0x1043, 0x1267 },	/* ICH7 on Asus W5F */
 	{ 0x27DF, 0x103C, 0x30A1 },	/* ICH7 on HP Compaq nc2400 */
+	{ 0x27DF, 0x1071, 0xD221 },	/* ICH7 on Hercules EC-900 */
 	{ 0x24CA, 0x1025, 0x0061 },	/* ICH4 on ACER Aspire 2023WLMi */
 	{ 0x24CA, 0x1025, 0x003d },	/* ICH4 on ACER TM290 */
 	{ 0x266F, 0x1025, 0x0066 },	/* ICH6 on ACER Aspire 1694WLMi */
@@ -885,23 +891,9 @@
  * Serial ATA Index/Data Pair Superset Registers access
  *
  * Beginning from ICH8, there's a sane way to access SCRs using index
- * and data register pair located at BAR5.  This creates an
- * interesting problem of mapping two SCRs to one port.
- *
- * Although they have separate SCRs, the master and slave aren't
- * independent enough to be treated as separate links - e.g. softreset
- * resets both.  Also, there's no protocol defined for hard resetting
- * singled device sharing the virtual port (no defined way to acquire
- * device signature).  This is worked around by merging the SCR values
- * into one sensible value and requesting follow-up SRST after
- * hardreset.
- *
- * SCR merging is perfomed in nibbles which is the unit contents in
- * SCRs are organized.  If two values are equal, the value is used.
- * When they differ, merge table which lists precedence of possible
- * values is consulted and the first match or the last entry when
- * nothing matches is used.  When there's no merge table for the
- * specific nibble, value from the first port is used.
+ * and data register pair located at BAR5 which means that we have
+ * separate SCRs for master and slave.  This is handled using libata
+ * slave_link facility.
  */
 static const int piix_sidx_map[] = {
 	[SCR_STATUS]	= 0,
@@ -909,120 +901,38 @@
 	[SCR_CONTROL]	= 1,
 };
 
-static void piix_sidpr_sel(struct ata_device *dev, unsigned int reg)
+static void piix_sidpr_sel(struct ata_link *link, unsigned int reg)
 {
-	struct ata_port *ap = dev->link->ap;
+	struct ata_port *ap = link->ap;
 	struct piix_host_priv *hpriv = ap->host->private_data;
 
-	iowrite32(((ap->port_no * 2 + dev->devno) << 8) | piix_sidx_map[reg],
+	iowrite32(((ap->port_no * 2 + link->pmp) << 8) | piix_sidx_map[reg],
 		  hpriv->sidpr + PIIX_SIDPR_IDX);
 }
 
-static int piix_sidpr_read(struct ata_device *dev, unsigned int reg)
+static int piix_sidpr_scr_read(struct ata_link *link,
+			       unsigned int reg, u32 *val)
 {
-	struct piix_host_priv *hpriv = dev->link->ap->host->private_data;
-
-	piix_sidpr_sel(dev, reg);
-	return ioread32(hpriv->sidpr + PIIX_SIDPR_DATA);
-}
-
-static void piix_sidpr_write(struct ata_device *dev, unsigned int reg, u32 val)
-{
-	struct piix_host_priv *hpriv = dev->link->ap->host->private_data;
-
-	piix_sidpr_sel(dev, reg);
-	iowrite32(val, hpriv->sidpr + PIIX_SIDPR_DATA);
-}
-
-static u32 piix_merge_scr(u32 val0, u32 val1, const int * const *merge_tbl)
-{
-	u32 val = 0;
-	int i, mi;
-
-	for (i = 0, mi = 0; i < 32 / 4; i++) {
-		u8 c0 = (val0 >> (i * 4)) & 0xf;
-		u8 c1 = (val1 >> (i * 4)) & 0xf;
-		u8 merged = c0;
-		const int *cur;
-
-		/* if no merge preference, assume the first value */
-		cur = merge_tbl[mi];
-		if (!cur)
-			goto done;
-		mi++;
-
-		/* if two values equal, use it */
-		if (c0 == c1)
-			goto done;
-
-		/* choose the first match or the last from the merge table */
-		while (*cur != -1) {
-			if (c0 == *cur || c1 == *cur)
-				break;
-			cur++;
-		}
-		if (*cur == -1)
-			cur--;
-		merged = *cur;
-	done:
-		val |= merged << (i * 4);
-	}
-
-	return val;
-}
-
-static int piix_sidpr_scr_read(struct ata_port *ap, unsigned int reg, u32 *val)
-{
-	const int * const sstatus_merge_tbl[] = {
-		/* DET */ (const int []){ 1, 3, 0, 4, 3, -1 },
-		/* SPD */ (const int []){ 2, 1, 0, -1 },
-		/* IPM */ (const int []){ 6, 2, 1, 0, -1 },
-		NULL,
-	};
-	const int * const scontrol_merge_tbl[] = {
-		/* DET */ (const int []){ 1, 0, 4, 0, -1 },
-		/* SPD */ (const int []){ 0, 2, 1, 0, -1 },
-		/* IPM */ (const int []){ 0, 1, 2, 3, 0, -1 },
-		NULL,
-	};
-	u32 v0, v1;
+	struct piix_host_priv *hpriv = link->ap->host->private_data;
 
 	if (reg >= ARRAY_SIZE(piix_sidx_map))
 		return -EINVAL;
 
-	if (!(ap->flags & ATA_FLAG_SLAVE_POSS)) {
-		*val = piix_sidpr_read(&ap->link.device[0], reg);
-		return 0;
-	}
-
-	v0 = piix_sidpr_read(&ap->link.device[0], reg);
-	v1 = piix_sidpr_read(&ap->link.device[1], reg);
-
-	switch (reg) {
-	case SCR_STATUS:
-		*val = piix_merge_scr(v0, v1, sstatus_merge_tbl);
-		break;
-	case SCR_ERROR:
-		*val = v0 | v1;
-		break;
-	case SCR_CONTROL:
-		*val = piix_merge_scr(v0, v1, scontrol_merge_tbl);
-		break;
-	}
-
+	piix_sidpr_sel(link, reg);
+	*val = ioread32(hpriv->sidpr + PIIX_SIDPR_DATA);
 	return 0;
 }
 
-static int piix_sidpr_scr_write(struct ata_port *ap, unsigned int reg, u32 val)
+static int piix_sidpr_scr_write(struct ata_link *link,
+				unsigned int reg, u32 val)
 {
+	struct piix_host_priv *hpriv = link->ap->host->private_data;
+
 	if (reg >= ARRAY_SIZE(piix_sidx_map))
 		return -EINVAL;
 
-	piix_sidpr_write(&ap->link.device[0], reg, val);
-
-	if (ap->flags & ATA_FLAG_SLAVE_POSS)
-		piix_sidpr_write(&ap->link.device[1], reg, val);
-
+	piix_sidpr_sel(link, reg);
+	iowrite32(val, hpriv->sidpr + PIIX_SIDPR_DATA);
 	return 0;
 }
 
@@ -1363,28 +1273,28 @@
 	return map;
 }
 
-static void __devinit piix_init_sidpr(struct ata_host *host)
+static int __devinit piix_init_sidpr(struct ata_host *host)
 {
 	struct pci_dev *pdev = to_pci_dev(host->dev);
 	struct piix_host_priv *hpriv = host->private_data;
-	struct ata_device *dev0 = &host->ports[0]->link.device[0];
+	struct ata_link *link0 = &host->ports[0]->link;
 	u32 scontrol;
-	int i;
+	int i, rc;
 
 	/* check for availability */
 	for (i = 0; i < 4; i++)
 		if (hpriv->map[i] == IDE)
-			return;
+			return 0;
 
 	if (!(host->ports[0]->flags & PIIX_FLAG_SIDPR))
-		return;
+		return 0;
 
 	if (pci_resource_start(pdev, PIIX_SIDPR_BAR) == 0 ||
 	    pci_resource_len(pdev, PIIX_SIDPR_BAR) != PIIX_SIDPR_LEN)
-		return;
+		return 0;
 
 	if (pcim_iomap_regions(pdev, 1 << PIIX_SIDPR_BAR, DRV_NAME))
-		return;
+		return 0;
 
 	hpriv->sidpr = pcim_iomap_table(pdev)[PIIX_SIDPR_BAR];
 
@@ -1392,7 +1302,7 @@
 	 * Give it a test drive by inhibiting power save modes which
 	 * we'll do anyway.
 	 */
-	scontrol = piix_sidpr_read(dev0, SCR_CONTROL);
+	piix_sidpr_scr_read(link0, SCR_CONTROL, &scontrol);
 
 	/* if IPM is already 3, SCR access is probably working.  Don't
 	 * un-inhibit power save modes as BIOS might have inhibited
@@ -1400,18 +1310,30 @@
 	 */
 	if ((scontrol & 0xf00) != 0x300) {
 		scontrol |= 0x300;
-		piix_sidpr_write(dev0, SCR_CONTROL, scontrol);
-		scontrol = piix_sidpr_read(dev0, SCR_CONTROL);
+		piix_sidpr_scr_write(link0, SCR_CONTROL, scontrol);
+		piix_sidpr_scr_read(link0, SCR_CONTROL, &scontrol);
 
 		if ((scontrol & 0xf00) != 0x300) {
 			dev_printk(KERN_INFO, host->dev, "SCR access via "
 				   "SIDPR is available but doesn't work\n");
-			return;
+			return 0;
 		}
 	}
 
-	host->ports[0]->ops = &piix_sidpr_sata_ops;
-	host->ports[1]->ops = &piix_sidpr_sata_ops;
+	/* okay, SCRs available, set ops and ask libata for slave_link */
+	for (i = 0; i < 2; i++) {
+		struct ata_port *ap = host->ports[i];
+
+		ap->ops = &piix_sidpr_sata_ops;
+
+		if (ap->flags & ATA_FLAG_SLAVE_POSS) {
+			rc = ata_slave_link_init(ap);
+			if (rc)
+				return rc;
+		}
+	}
+
+	return 0;
 }
 
 static void piix_iocfg_bit18_quirk(struct pci_dev *pdev)
@@ -1521,7 +1443,9 @@
 	/* initialize controller */
 	if (port_flags & ATA_FLAG_SATA) {
 		piix_init_pcs(host, piix_map_db_table[ent->driver_data]);
-		piix_init_sidpr(host);
+		rc = piix_init_sidpr(host);
+		if (rc)
+			return rc;
 	}
 
 	/* apply IOCFG bit18 quirk */
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 79e3a8e..1ee9499 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -163,6 +163,67 @@
 MODULE_VERSION(DRV_VERSION);
 
 
+/*
+ * Iterator helpers.  Don't use directly.
+ *
+ * LOCKING:
+ * Host lock or EH context.
+ */
+struct ata_link *__ata_port_next_link(struct ata_port *ap,
+				      struct ata_link *link, bool dev_only)
+{
+	/* NULL link indicates start of iteration */
+	if (!link) {
+		if (dev_only && sata_pmp_attached(ap))
+			return ap->pmp_link;
+		return &ap->link;
+	}
+
+	/* we just iterated over the host master link, what's next? */
+	if (link == &ap->link) {
+		if (!sata_pmp_attached(ap)) {
+			if (unlikely(ap->slave_link) && !dev_only)
+				return ap->slave_link;
+			return NULL;
+		}
+		return ap->pmp_link;
+	}
+
+	/* slave_link excludes PMP */
+	if (unlikely(link == ap->slave_link))
+		return NULL;
+
+	/* iterate to the next PMP link */
+	if (++link < ap->pmp_link + ap->nr_pmp_links)
+		return link;
+	return NULL;
+}
+
+/**
+ *	ata_dev_phys_link - find physical link for a device
+ *	@dev: ATA device to look up physical link for
+ *
+ *	Look up physical link which @dev is attached to.  Note that
+ *	this is different from @dev->link only when @dev is on slave
+ *	link.  For all other cases, it's the same as @dev->link.
+ *
+ *	LOCKING:
+ *	Don't care.
+ *
+ *	RETURNS:
+ *	Pointer to the found physical link.
+ */
+struct ata_link *ata_dev_phys_link(struct ata_device *dev)
+{
+	struct ata_port *ap = dev->link->ap;
+
+	if (!ap->slave_link)
+		return dev->link;
+	if (!dev->devno)
+		return &ap->link;
+	return ap->slave_link;
+}
+
 /**
  *	ata_force_cbl - force cable type according to libata.force
  *	@ap: ATA port of interest
@@ -206,7 +267,8 @@
  *	the host link and all fan-out ports connected via PMP.  If the
  *	device part is specified as 0 (e.g. 1.00:), it specifies the
  *	first fan-out link not the host link.  Device number 15 always
- *	points to the host link whether PMP is attached or not.
+ *	points to the host link whether PMP is attached or not.  If the
+ *	controller has slave link, device number 16 points to it.
  *
  *	LOCKING:
  *	EH context.
@@ -214,12 +276,11 @@
 static void ata_force_link_limits(struct ata_link *link)
 {
 	bool did_spd = false;
-	int linkno, i;
+	int linkno = link->pmp;
+	int i;
 
 	if (ata_is_host_link(link))
-		linkno = 15;
-	else
-		linkno = link->pmp;
+		linkno += 15;
 
 	for (i = ata_force_tbl_size - 1; i >= 0; i--) {
 		const struct ata_force_ent *fe = &ata_force_tbl[i];
@@ -266,9 +327,9 @@
 	int alt_devno = devno;
 	int i;
 
-	/* allow n.15 for the first device attached to host port */
-	if (ata_is_host_link(dev->link) && devno == 0)
-		alt_devno = 15;
+	/* allow n.15/16 for devices attached to host port */
+	if (ata_is_host_link(dev->link))
+		alt_devno += 15;
 
 	for (i = ata_force_tbl_size - 1; i >= 0; i--) {
 		const struct ata_force_ent *fe = &ata_force_tbl[i];
@@ -320,9 +381,9 @@
 	int alt_devno = devno;
 	int i;
 
-	/* allow n.15 for the first device attached to host port */
-	if (ata_is_host_link(dev->link) && devno == 0)
-		alt_devno = 15;
+	/* allow n.15/16 for devices attached to host port */
+	if (ata_is_host_link(dev->link))
+		alt_devno += 15;
 
 	for (i = 0; i < ata_force_tbl_size; i++) {
 		const struct ata_force_ent *fe = &ata_force_tbl[i];
@@ -2681,7 +2742,7 @@
 		return;
 	sata_scr_read(link, SCR_CONTROL, &scontrol);
 
-	if (ata_link_online(link)) {
+	if (ata_phys_link_online(link)) {
 		tmp = (sstatus >> 4) & 0xf;
 		ata_link_printk(link, KERN_INFO,
 				"SATA link up %s (SStatus %X SControl %X)\n",
@@ -3372,6 +3433,12 @@
 	unsigned long nodev_deadline = ata_deadline(start, ATA_TMOUT_FF_WAIT);
 	int warned = 0;
 
+	/* Slave readiness can't be tested separately from master.  On
+	 * M/S emulation configuration, this function should be called
+	 * only on the master and it will handle both master and slave.
+	 */
+	WARN_ON(link == link->ap->slave_link);
+
 	if (time_after(nodev_deadline, deadline))
 		nodev_deadline = deadline;
 
@@ -3593,7 +3660,7 @@
 	}
 
 	/* no point in trying softreset on offline link */
-	if (ata_link_offline(link))
+	if (ata_phys_link_offline(link))
 		ehc->i.action &= ~ATA_EH_SOFTRESET;
 
 	return 0;
@@ -3671,7 +3738,7 @@
 	if (rc)
 		goto out;
 	/* if link is offline nothing more to do */
-	if (ata_link_offline(link))
+	if (ata_phys_link_offline(link))
 		goto out;
 
 	/* Link is online.  From this point, -ENODEV too is an error. */
@@ -4868,10 +4935,8 @@
 int sata_scr_read(struct ata_link *link, int reg, u32 *val)
 {
 	if (ata_is_host_link(link)) {
-		struct ata_port *ap = link->ap;
-
 		if (sata_scr_valid(link))
-			return ap->ops->scr_read(ap, reg, val);
+			return link->ap->ops->scr_read(link, reg, val);
 		return -EOPNOTSUPP;
 	}
 
@@ -4897,10 +4962,8 @@
 int sata_scr_write(struct ata_link *link, int reg, u32 val)
 {
 	if (ata_is_host_link(link)) {
-		struct ata_port *ap = link->ap;
-
 		if (sata_scr_valid(link))
-			return ap->ops->scr_write(ap, reg, val);
+			return link->ap->ops->scr_write(link, reg, val);
 		return -EOPNOTSUPP;
 	}
 
@@ -4925,13 +4988,12 @@
 int sata_scr_write_flush(struct ata_link *link, int reg, u32 val)
 {
 	if (ata_is_host_link(link)) {
-		struct ata_port *ap = link->ap;
 		int rc;
 
 		if (sata_scr_valid(link)) {
-			rc = ap->ops->scr_write(ap, reg, val);
+			rc = link->ap->ops->scr_write(link, reg, val);
 			if (rc == 0)
-				rc = ap->ops->scr_read(ap, reg, &val);
+				rc = link->ap->ops->scr_read(link, reg, &val);
 			return rc;
 		}
 		return -EOPNOTSUPP;
@@ -4941,7 +5003,7 @@
 }
 
 /**
- *	ata_link_online - test whether the given link is online
+ *	ata_phys_link_online - test whether the given link is online
  *	@link: ATA link to test
  *
  *	Test whether @link is online.  Note that this function returns
@@ -4952,20 +5014,20 @@
  *	None.
  *
  *	RETURNS:
- *	1 if the port online status is available and online.
+ *	True if the port online status is available and online.
  */
-int ata_link_online(struct ata_link *link)
+bool ata_phys_link_online(struct ata_link *link)
 {
 	u32 sstatus;
 
 	if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0 &&
 	    (sstatus & 0xf) == 0x3)
-		return 1;
-	return 0;
+		return true;
+	return false;
 }
 
 /**
- *	ata_link_offline - test whether the given link is offline
+ *	ata_phys_link_offline - test whether the given link is offline
  *	@link: ATA link to test
  *
  *	Test whether @link is offline.  Note that this function
@@ -4976,16 +5038,68 @@
  *	None.
  *
  *	RETURNS:
- *	1 if the port offline status is available and offline.
+ *	True if the port offline status is available and offline.
  */
-int ata_link_offline(struct ata_link *link)
+bool ata_phys_link_offline(struct ata_link *link)
 {
 	u32 sstatus;
 
 	if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0 &&
 	    (sstatus & 0xf) != 0x3)
-		return 1;
-	return 0;
+		return true;
+	return false;
+}
+
+/**
+ *	ata_link_online - test whether the given link is online
+ *	@link: ATA link to test
+ *
+ *	Test whether @link is online.  This is identical to
+ *	ata_phys_link_online() when there's no slave link.  When
+ *	there's a slave link, this function should only be called on
+ *	the master link and will return true if any of M/S links is
+ *	online.
+ *
+ *	LOCKING:
+ *	None.
+ *
+ *	RETURNS:
+ *	True if the port online status is available and online.
+ */
+bool ata_link_online(struct ata_link *link)
+{
+	struct ata_link *slave = link->ap->slave_link;
+
+	WARN_ON(link == slave);	/* shouldn't be called on slave link */
+
+	return ata_phys_link_online(link) ||
+		(slave && ata_phys_link_online(slave));
+}
+
+/**
+ *	ata_link_offline - test whether the given link is offline
+ *	@link: ATA link to test
+ *
+ *	Test whether @link is offline.  This is identical to
+ *	ata_phys_link_offline() when there's no slave link.  When
+ *	there's a slave link, this function should only be called on
+ *	the master link and will return true if both M/S links are
+ *	offline.
+ *
+ *	LOCKING:
+ *	None.
+ *
+ *	RETURNS:
+ *	True if the port offline status is available and offline.
+ */
+bool ata_link_offline(struct ata_link *link)
+{
+	struct ata_link *slave = link->ap->slave_link;
+
+	WARN_ON(link == slave);	/* shouldn't be called on slave link */
+
+	return ata_phys_link_offline(link) &&
+		(!slave || ata_phys_link_offline(slave));
 }
 
 #ifdef CONFIG_PM
@@ -5127,11 +5241,11 @@
  */
 void ata_dev_init(struct ata_device *dev)
 {
-	struct ata_link *link = dev->link;
+	struct ata_link *link = ata_dev_phys_link(dev);
 	struct ata_port *ap = link->ap;
 	unsigned long flags;
 
-	/* SATA spd limit is bound to the first device */
+	/* SATA spd limit is bound to the attached device, reset together */
 	link->sata_spd_limit = link->hw_sata_spd_limit;
 	link->sata_spd = 0;
 
@@ -5264,6 +5378,7 @@
 	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
 	INIT_LIST_HEAD(&ap->eh_done_q);
 	init_waitqueue_head(&ap->eh_wait_q);
+	init_completion(&ap->park_req_pending);
 	init_timer_deferrable(&ap->fastdrain_timer);
 	ap->fastdrain_timer.function = ata_eh_fastdrain_timerfn;
 	ap->fastdrain_timer.data = (unsigned long)ap;
@@ -5294,6 +5409,7 @@
 			scsi_host_put(ap->scsi_host);
 
 		kfree(ap->pmp_link);
+		kfree(ap->slave_link);
 		kfree(ap);
 		host->ports[i] = NULL;
 	}
@@ -5414,6 +5530,68 @@
 	return host;
 }
 
+/**
+ *	ata_slave_link_init - initialize slave link
+ *	@ap: port to initialize slave link for
+ *
+ *	Create and initialize slave link for @ap.  This enables slave
+ *	link handling on the port.
+ *
+ *	In libata, a port contains links and a link contains devices.
+ *	There is single host link but if a PMP is attached to it,
+ *	there can be multiple fan-out links.  On SATA, there's usually
+ *	a single device connected to a link but PATA and SATA
+ *	controllers emulating TF based interface can have two - master
+ *	and slave.
+ *
+ *	However, there are a few controllers which don't fit into this
+ *	abstraction too well - SATA controllers which emulate TF
+ *	interface with both master and slave devices but also have
+ *	separate SCR register sets for each device.  These controllers
+ *	need separate links for physical link handling
+ *	(e.g. onlineness, link speed) but should be treated like a
+ *	traditional M/S controller for everything else (e.g. command
+ *	issue, softreset).
+ *
+ *	slave_link is libata's way of handling this class of
+ *	controllers without impacting core layer too much.  For
+ *	anything other than physical link handling, the default host
+ *	link is used for both master and slave.  For physical link
+ *	handling, separate @ap->slave_link is used.  All dirty details
+ *	are implemented inside libata core layer.  From LLD's POV, the
+ *	only difference is that prereset, hardreset and postreset are
+ *	called once more for the slave link, so the reset sequence
+ *	looks like the following.
+ *
+ *	prereset(M) -> prereset(S) -> hardreset(M) -> hardreset(S) ->
+ *	softreset(M) -> postreset(M) -> postreset(S)
+ *
+ *	Note that softreset is called only for the master.  Softreset
+ *	resets both M/S by definition, so SRST on master should handle
+ *	both (the standard method will work just fine).
+ *
+ *	LOCKING:
+ *	Should be called before host is registered.
+ *
+ *	RETURNS:
+ *	0 on success, -errno on failure.
+ */
+int ata_slave_link_init(struct ata_port *ap)
+{
+	struct ata_link *link;
+
+	WARN_ON(ap->slave_link);
+	WARN_ON(ap->flags & ATA_FLAG_PMP);
+
+	link = kzalloc(sizeof(*link), GFP_KERNEL);
+	if (!link)
+		return -ENOMEM;
+
+	ata_link_init(ap, link, 1);
+	ap->slave_link = link;
+	return 0;
+}
+
 static void ata_host_stop(struct device *gendev, void *res)
 {
 	struct ata_host *host = dev_get_drvdata(gendev);
@@ -5640,6 +5818,8 @@
 
 		/* init sata_spd_limit to the current value */
 		sata_link_init_spd(&ap->link);
+		if (ap->slave_link)
+			sata_link_init_spd(ap->slave_link);
 
 		/* print per-port info to dmesg */
 		xfer_mask = ata_pack_xfermask(ap->pio_mask, ap->mwdma_mask,
@@ -6260,10 +6440,12 @@
 EXPORT_SYMBOL_GPL(sata_port_ops);
 EXPORT_SYMBOL_GPL(ata_dummy_port_ops);
 EXPORT_SYMBOL_GPL(ata_dummy_port_info);
+EXPORT_SYMBOL_GPL(__ata_port_next_link);
 EXPORT_SYMBOL_GPL(ata_std_bios_param);
 EXPORT_SYMBOL_GPL(ata_host_init);
 EXPORT_SYMBOL_GPL(ata_host_alloc);
 EXPORT_SYMBOL_GPL(ata_host_alloc_pinfo);
+EXPORT_SYMBOL_GPL(ata_slave_link_init);
 EXPORT_SYMBOL_GPL(ata_host_start);
 EXPORT_SYMBOL_GPL(ata_host_register);
 EXPORT_SYMBOL_GPL(ata_host_activate);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index c1db2f2..a93247c 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -33,6 +33,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/blkdev.h>
 #include <linux/pci.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
@@ -79,6 +80,8 @@
 	 */
 	ATA_EH_PRERESET_TIMEOUT		= 10000,
 	ATA_EH_FASTDRAIN_INTERVAL	=  3000,
+
+	ATA_EH_UA_TRIES			= 5,
 };
 
 /* The following table determines how we sequence resets.  Each entry
@@ -457,29 +460,29 @@
  *	RETURNS:
  *	EH_HANDLED or EH_NOT_HANDLED
  */
-enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
+enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
 {
 	struct Scsi_Host *host = cmd->device->host;
 	struct ata_port *ap = ata_shost_to_port(host);
 	unsigned long flags;
 	struct ata_queued_cmd *qc;
-	enum scsi_eh_timer_return ret;
+	enum blk_eh_timer_return ret;
 
 	DPRINTK("ENTER\n");
 
 	if (ap->ops->error_handler) {
-		ret = EH_NOT_HANDLED;
+		ret = BLK_EH_NOT_HANDLED;
 		goto out;
 	}
 
-	ret = EH_HANDLED;
+	ret = BLK_EH_HANDLED;
 	spin_lock_irqsave(ap->lock, flags);
 	qc = ata_qc_from_tag(ap, ap->link.active_tag);
 	if (qc) {
 		WARN_ON(qc->scsicmd != cmd);
 		qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
 		qc->err_mask |= AC_ERR_TIMEOUT;
-		ret = EH_NOT_HANDLED;
+		ret = BLK_EH_NOT_HANDLED;
 	}
 	spin_unlock_irqrestore(ap->lock, flags);
 
@@ -831,7 +834,7 @@
 	 * Note that ATA_QCFLAG_FAILED is unconditionally set after
 	 * this function completes.
 	 */
-	scsi_req_abort_cmd(qc->scsicmd);
+	blk_abort_request(qc->scsicmd->request);
 }
 
 /**
@@ -1357,6 +1360,37 @@
 }
 
 /**
+ *	atapi_eh_tur - perform ATAPI TEST_UNIT_READY
+ *	@dev: target ATAPI device
+ *	@r_sense_key: out parameter for sense_key
+ *
+ *	Perform ATAPI TEST_UNIT_READY.
+ *
+ *	LOCKING:
+ *	EH context (may sleep).
+ *
+ *	RETURNS:
+ *	0 on success, AC_ERR_* mask on failure.
+ */
+static unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key)
+{
+	u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 };
+	struct ata_taskfile tf;
+	unsigned int err_mask;
+
+	ata_tf_init(dev, &tf);
+
+	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+	tf.command = ATA_CMD_PACKET;
+	tf.protocol = ATAPI_PROT_NODATA;
+
+	err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0);
+	if (err_mask == AC_ERR_DEV)
+		*r_sense_key = tf.feature >> 4;
+	return err_mask;
+}
+
+/**
  *	atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
  *	@dev: device to perform REQUEST_SENSE to
  *	@sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
@@ -1756,7 +1790,7 @@
 static unsigned int ata_eh_speed_down(struct ata_device *dev,
 				unsigned int eflags, unsigned int err_mask)
 {
-	struct ata_link *link = dev->link;
+	struct ata_link *link = ata_dev_phys_link(dev);
 	int xfer_ok = 0;
 	unsigned int verdict;
 	unsigned int action = 0;
@@ -1880,7 +1914,8 @@
 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
 		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
 
-		if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link)
+		if (!(qc->flags & ATA_QCFLAG_FAILED) ||
+		    ata_dev_phys_link(qc->dev) != link)
 			continue;
 
 		/* inherit upper level err_mask */
@@ -1967,6 +2002,23 @@
 	ata_port_for_each_link(link, ap)
 		ata_eh_link_autopsy(link);
 
+	/* Handle the frigging slave link.  Autopsy is done similarly
+	 * but actions and flags are transferred over to the master
+	 * link and handled from there.
+	 */
+	if (ap->slave_link) {
+		struct ata_eh_context *mehc = &ap->link.eh_context;
+		struct ata_eh_context *sehc = &ap->slave_link->eh_context;
+
+		ata_eh_link_autopsy(ap->slave_link);
+
+		ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
+		mehc->i.action		|= sehc->i.action;
+		mehc->i.dev_action[1]	|= sehc->i.dev_action[1];
+		mehc->i.flags		|= sehc->i.flags;
+		ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
+	}
+
 	/* Autopsy of fanout ports can affect host link autopsy.
 	 * Perform host link autopsy last.
 	 */
@@ -2001,7 +2053,8 @@
 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
 		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
 
-		if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link ||
+		if (!(qc->flags & ATA_QCFLAG_FAILED) ||
+		    ata_dev_phys_link(qc->dev) != link ||
 		    ((qc->flags & ATA_QCFLAG_QUIET) &&
 		     qc->err_mask == AC_ERR_DEV))
 			continue;
@@ -2068,7 +2121,7 @@
 		char cdb_buf[70] = "";
 
 		if (!(qc->flags & ATA_QCFLAG_FAILED) ||
-		    qc->dev->link != link || !qc->err_mask)
+		    ata_dev_phys_link(qc->dev) != link || !qc->err_mask)
 			continue;
 
 		if (qc->dma_dir != DMA_NONE) {
@@ -2160,12 +2213,14 @@
 }
 
 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset,
-			unsigned int *classes, unsigned long deadline)
+			unsigned int *classes, unsigned long deadline,
+			bool clear_classes)
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link)
-		classes[dev->devno] = ATA_DEV_UNKNOWN;
+	if (clear_classes)
+		ata_link_for_each_dev(dev, link)
+			classes[dev->devno] = ATA_DEV_UNKNOWN;
 
 	return reset(link, classes, deadline);
 }
@@ -2187,17 +2242,20 @@
 		 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
 {
 	struct ata_port *ap = link->ap;
+	struct ata_link *slave = ap->slave_link;
 	struct ata_eh_context *ehc = &link->eh_context;
+	struct ata_eh_context *sehc = &slave->eh_context;
 	unsigned int *classes = ehc->classes;
 	unsigned int lflags = link->flags;
 	int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
 	int max_tries = 0, try = 0;
+	struct ata_link *failed_link;
 	struct ata_device *dev;
 	unsigned long deadline, now;
 	ata_reset_fn_t reset;
 	unsigned long flags;
 	u32 sstatus;
-	int nr_known, rc;
+	int nr_unknown, rc;
 
 	/*
 	 * Prepare to reset
@@ -2252,8 +2310,30 @@
 	}
 
 	if (prereset) {
-		rc = prereset(link,
-			      ata_deadline(jiffies, ATA_EH_PRERESET_TIMEOUT));
+		unsigned long deadline = ata_deadline(jiffies,
+						      ATA_EH_PRERESET_TIMEOUT);
+
+		if (slave) {
+			sehc->i.action &= ~ATA_EH_RESET;
+			sehc->i.action |= ehc->i.action;
+		}
+
+		rc = prereset(link, deadline);
+
+		/* If present, do prereset on slave link too.  Reset
+		 * is skipped iff both master and slave links report
+		 * -ENOENT or clear ATA_EH_RESET.
+		 */
+		if (slave && (rc == 0 || rc == -ENOENT)) {
+			int tmp;
+
+			tmp = prereset(slave, deadline);
+			if (tmp != -ENOENT)
+				rc = tmp;
+
+			ehc->i.action |= sehc->i.action;
+		}
+
 		if (rc) {
 			if (rc == -ENOENT) {
 				ata_link_printk(link, KERN_DEBUG,
@@ -2302,25 +2382,51 @@
 		else
 			ehc->i.flags |= ATA_EHI_DID_SOFTRESET;
 
-		rc = ata_do_reset(link, reset, classes, deadline);
-		if (rc && rc != -EAGAIN)
+		rc = ata_do_reset(link, reset, classes, deadline, true);
+		if (rc && rc != -EAGAIN) {
+			failed_link = link;
 			goto fail;
+		}
 
+		/* hardreset slave link if existent */
+		if (slave && reset == hardreset) {
+			int tmp;
+
+			if (verbose)
+				ata_link_printk(slave, KERN_INFO,
+						"hard resetting link\n");
+
+			ata_eh_about_to_do(slave, NULL, ATA_EH_RESET);
+			tmp = ata_do_reset(slave, reset, classes, deadline,
+					   false);
+			switch (tmp) {
+			case -EAGAIN:
+				rc = -EAGAIN;
+			case 0:
+				break;
+			default:
+				failed_link = slave;
+				rc = tmp;
+				goto fail;
+			}
+		}
+
+		/* perform follow-up SRST if necessary */
 		if (reset == hardreset &&
 		    ata_eh_followup_srst_needed(link, rc, classes)) {
-			/* okay, let's do follow-up softreset */
 			reset = softreset;
 
 			if (!reset) {
 				ata_link_printk(link, KERN_ERR,
 						"follow-up softreset required "
 						"but no softreset avaliable\n");
+				failed_link = link;
 				rc = -EINVAL;
 				goto fail;
 			}
 
 			ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
-			rc = ata_do_reset(link, reset, classes, deadline);
+			rc = ata_do_reset(link, reset, classes, deadline, true);
 		}
 	} else {
 		if (verbose)
@@ -2341,7 +2447,7 @@
 		dev->pio_mode = XFER_PIO_0;
 		dev->flags &= ~ATA_DFLAG_SLEEPING;
 
-		if (ata_link_offline(link))
+		if (ata_phys_link_offline(ata_dev_phys_link(dev)))
 			continue;
 
 		/* apply class override */
@@ -2354,6 +2460,8 @@
 	/* record current link speed */
 	if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0)
 		link->sata_spd = (sstatus >> 4) & 0xf;
+	if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0)
+		slave->sata_spd = (sstatus >> 4) & 0xf;
 
 	/* thaw the port */
 	if (ata_is_host_link(link))
@@ -2366,12 +2474,17 @@
 	 * reset and here.  This race is mediated by cross checking
 	 * link onlineness and classification result later.
 	 */
-	if (postreset)
+	if (postreset) {
 		postreset(link, classes);
+		if (slave)
+			postreset(slave, classes);
+	}
 
 	/* clear cached SError */
 	spin_lock_irqsave(link->ap->lock, flags);
 	link->eh_info.serror = 0;
+	if (slave)
+		slave->eh_info.serror = 0;
 	spin_unlock_irqrestore(link->ap->lock, flags);
 
 	/* Make sure onlineness and classification result correspond.
@@ -2381,19 +2494,21 @@
 	 * link onlineness and classification result, those conditions
 	 * can be reliably detected and retried.
 	 */
-	nr_known = 0;
+	nr_unknown = 0;
 	ata_link_for_each_dev(dev, link) {
 		/* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */
-		if (classes[dev->devno] == ATA_DEV_UNKNOWN)
+		if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
 			classes[dev->devno] = ATA_DEV_NONE;
-		else
-			nr_known++;
+			if (ata_phys_link_online(ata_dev_phys_link(dev)))
+				nr_unknown++;
+		}
 	}
 
-	if (classify && !nr_known && ata_link_online(link)) {
+	if (classify && nr_unknown) {
 		if (try < max_tries) {
 			ata_link_printk(link, KERN_WARNING, "link online but "
 				       "device misclassified, retrying\n");
+			failed_link = link;
 			rc = -EAGAIN;
 			goto fail;
 		}
@@ -2404,6 +2519,8 @@
 
 	/* reset successful, schedule revalidation */
 	ata_eh_done(link, NULL, ATA_EH_RESET);
+	if (slave)
+		ata_eh_done(slave, NULL, ATA_EH_RESET);
 	ehc->last_reset = jiffies;
 	ehc->i.action |= ATA_EH_REVALIDATE;
 
@@ -2411,6 +2528,8 @@
  out:
 	/* clear hotplug flag */
 	ehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
+	if (slave)
+		sehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
 
 	spin_lock_irqsave(ap->lock, flags);
 	ap->pflags &= ~ATA_PFLAG_RESETTING;
@@ -2431,7 +2550,7 @@
 	if (time_before(now, deadline)) {
 		unsigned long delta = deadline - now;
 
-		ata_link_printk(link, KERN_WARNING,
+		ata_link_printk(failed_link, KERN_WARNING,
 			"reset failed (errno=%d), retrying in %u secs\n",
 			rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000));
 
@@ -2439,13 +2558,92 @@
 			delta = schedule_timeout_uninterruptible(delta);
 	}
 
-	if (rc == -EPIPE || try == max_tries - 1)
+	if (try == max_tries - 1) {
 		sata_down_spd_limit(link);
+		if (slave)
+			sata_down_spd_limit(slave);
+	} else if (rc == -EPIPE)
+		sata_down_spd_limit(failed_link);
+
 	if (hardreset)
 		reset = hardreset;
 	goto retry;
 }
 
+static inline void ata_eh_pull_park_action(struct ata_port *ap)
+{
+	struct ata_link *link;
+	struct ata_device *dev;
+	unsigned long flags;
+
+	/*
+	 * This function can be thought of as an extended version of
+	 * ata_eh_about_to_do() specially crafted to accommodate the
+	 * requirements of ATA_EH_PARK handling. Since the EH thread
+	 * does not leave the do {} while () loop in ata_eh_recover as
+	 * long as the timeout for a park request to *one* device on
+	 * the port has not expired, and since we still want to pick
+	 * up park requests to other devices on the same port or
+	 * timeout updates for the same device, we have to pull
+	 * ATA_EH_PARK actions from eh_info into eh_context.i
+	 * ourselves at the beginning of each pass over the loop.
+	 *
+	 * Additionally, all write accesses to &ap->park_req_pending
+	 * through INIT_COMPLETION() (see below) or complete_all()
+	 * (see ata_scsi_park_store()) are protected by the host lock.
+	 * As a result we have that park_req_pending.done is zero on
+	 * exit from this function, i.e. when ATA_EH_PARK actions for
+	 * *all* devices on port ap have been pulled into the
+	 * respective eh_context structs. If, and only if,
+	 * park_req_pending.done is non-zero by the time we reach
+	 * wait_for_completion_timeout(), another ATA_EH_PARK action
+	 * has been scheduled for at least one of the devices on port
+	 * ap and we have to cycle over the do {} while () loop in
+	 * ata_eh_recover() again.
+	 */
+
+	spin_lock_irqsave(ap->lock, flags);
+	INIT_COMPLETION(ap->park_req_pending);
+	ata_port_for_each_link(link, ap) {
+		ata_link_for_each_dev(dev, link) {
+			struct ata_eh_info *ehi = &link->eh_info;
+
+			link->eh_context.i.dev_action[dev->devno] |=
+				ehi->dev_action[dev->devno] & ATA_EH_PARK;
+			ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK);
+		}
+	}
+	spin_unlock_irqrestore(ap->lock, flags);
+}
+
+static void ata_eh_park_issue_cmd(struct ata_device *dev, int park)
+{
+	struct ata_eh_context *ehc = &dev->link->eh_context;
+	struct ata_taskfile tf;
+	unsigned int err_mask;
+
+	ata_tf_init(dev, &tf);
+	if (park) {
+		ehc->unloaded_mask |= 1 << dev->devno;
+		tf.command = ATA_CMD_IDLEIMMEDIATE;
+		tf.feature = 0x44;
+		tf.lbal = 0x4c;
+		tf.lbam = 0x4e;
+		tf.lbah = 0x55;
+	} else {
+		ehc->unloaded_mask &= ~(1 << dev->devno);
+		tf.command = ATA_CMD_CHK_POWER;
+	}
+
+	tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
+	tf.protocol |= ATA_PROT_NODATA;
+	err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
+	if (park && (err_mask || tf.lbal != 0xc4)) {
+		ata_dev_printk(dev, KERN_ERR, "head unload failed!\n");
+		ehc->unloaded_mask &= ~(1 << dev->devno);
+	}
+}
+
 static int ata_eh_revalidate_and_attach(struct ata_link *link,
 					struct ata_device **r_failed_dev)
 {
@@ -2472,7 +2670,7 @@
 		if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) {
 			WARN_ON(dev->class == ATA_DEV_PMP);
 
-			if (ata_link_offline(link)) {
+			if (ata_phys_link_offline(ata_dev_phys_link(dev))) {
 				rc = -EIO;
 				goto err;
 			}
@@ -2610,6 +2808,53 @@
 	return rc;
 }
 
+/**
+ *	atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset
+ *	@dev: ATAPI device to clear UA for
+ *
+ *	Resets and other operations can make an ATAPI device raise
+ *	UNIT ATTENTION which causes the next operation to fail.  This
+ *	function clears UA.
+ *
+ *	LOCKING:
+ *	EH context (may sleep).
+ *
+ *	RETURNS:
+ *	0 on success, -errno on failure.
+ */
+static int atapi_eh_clear_ua(struct ata_device *dev)
+{
+	int i;
+
+	for (i = 0; i < ATA_EH_UA_TRIES; i++) {
+		u8 sense_buffer[SCSI_SENSE_BUFFERSIZE];
+		u8 sense_key = 0;
+		unsigned int err_mask;
+
+		err_mask = atapi_eh_tur(dev, &sense_key);
+		if (err_mask != 0 && err_mask != AC_ERR_DEV) {
+			ata_dev_printk(dev, KERN_WARNING, "TEST_UNIT_READY "
+				"failed (err_mask=0x%x)\n", err_mask);
+			return -EIO;
+		}
+
+		if (!err_mask || sense_key != UNIT_ATTENTION)
+			return 0;
+
+		err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key);
+		if (err_mask) {
+			ata_dev_printk(dev, KERN_WARNING, "failed to clear "
+				"UNIT ATTENTION (err_mask=0x%x)\n", err_mask);
+			return -EIO;
+		}
+	}
+
+	ata_dev_printk(dev, KERN_WARNING,
+		"UNIT ATTENTION persists after %d tries\n", ATA_EH_UA_TRIES);
+
+	return 0;
+}
+
 static int ata_link_nr_enabled(struct ata_link *link)
 {
 	struct ata_device *dev;
@@ -2697,7 +2942,7 @@
 			/* This is the last chance, better to slow
 			 * down than lose it.
 			 */
-			sata_down_spd_limit(dev->link);
+			sata_down_spd_limit(ata_dev_phys_link(dev));
 			ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
 		}
 	}
@@ -2707,7 +2952,7 @@
 		ata_dev_disable(dev);
 
 		/* detach if offline */
-		if (ata_link_offline(dev->link))
+		if (ata_phys_link_offline(ata_dev_phys_link(dev)))
 			ata_eh_detach_dev(dev);
 
 		/* schedule probe if necessary */
@@ -2755,7 +3000,7 @@
 	struct ata_device *dev;
 	int nr_failed_devs;
 	int rc;
-	unsigned long flags;
+	unsigned long flags, deadline;
 
 	DPRINTK("ENTER\n");
 
@@ -2829,6 +3074,56 @@
 		}
 	}
 
+	do {
+		unsigned long now;
+
+		/*
+		 * clears ATA_EH_PARK in eh_info and resets
+		 * ap->park_req_pending
+		 */
+		ata_eh_pull_park_action(ap);
+
+		deadline = jiffies;
+		ata_port_for_each_link(link, ap) {
+			ata_link_for_each_dev(dev, link) {
+				struct ata_eh_context *ehc = &link->eh_context;
+				unsigned long tmp;
+
+				if (dev->class != ATA_DEV_ATA)
+					continue;
+				if (!(ehc->i.dev_action[dev->devno] &
+				      ATA_EH_PARK))
+					continue;
+				tmp = dev->unpark_deadline;
+				if (time_before(deadline, tmp))
+					deadline = tmp;
+				else if (time_before_eq(tmp, jiffies))
+					continue;
+				if (ehc->unloaded_mask & (1 << dev->devno))
+					continue;
+
+				ata_eh_park_issue_cmd(dev, 1);
+			}
+		}
+
+		now = jiffies;
+		if (time_before_eq(deadline, now))
+			break;
+
+		deadline = wait_for_completion_timeout(&ap->park_req_pending,
+						       deadline - now);
+	} while (deadline);
+	ata_port_for_each_link(link, ap) {
+		ata_link_for_each_dev(dev, link) {
+			if (!(link->eh_context.unloaded_mask &
+			      (1 << dev->devno)))
+				continue;
+
+			ata_eh_park_issue_cmd(dev, 0);
+			ata_eh_done(link, dev, ATA_EH_PARK);
+		}
+	}
+
 	/* the rest */
 	ata_port_for_each_link(link, ap) {
 		struct ata_eh_context *ehc = &link->eh_context;
@@ -2852,6 +3147,20 @@
 			ehc->i.flags &= ~ATA_EHI_SETMODE;
 		}
 
+		/* If reset has been issued, clear UA to avoid
+		 * disrupting the current users of the device.
+		 */
+		if (ehc->i.flags & ATA_EHI_DID_RESET) {
+			ata_link_for_each_dev(dev, link) {
+				if (dev->class != ATA_DEV_ATAPI)
+					continue;
+				rc = atapi_eh_clear_ua(dev);
+				if (rc)
+					goto dev_fail;
+			}
+		}
+
+		/* configure link power saving */
 		if (ehc->i.action & ATA_EH_LPM)
 			ata_link_for_each_dev(dev, link)
 				ata_dev_enable_pm(dev, ap->pm_policy);
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index b9d3ba4..59fe051 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -183,6 +183,105 @@
 		ata_scsi_lpm_show, ata_scsi_lpm_put);
 EXPORT_SYMBOL_GPL(dev_attr_link_power_management_policy);
 
+static ssize_t ata_scsi_park_show(struct device *device,
+				  struct device_attribute *attr, char *buf)
+{
+	struct scsi_device *sdev = to_scsi_device(device);
+	struct ata_port *ap;
+	struct ata_link *link;
+	struct ata_device *dev;
+	unsigned long flags;
+	unsigned int uninitialized_var(msecs);
+	int rc = 0;
+
+	ap = ata_shost_to_port(sdev->host);
+
+	spin_lock_irqsave(ap->lock, flags);
+	dev = ata_scsi_find_dev(ap, sdev);
+	if (!dev) {
+		rc = -ENODEV;
+		goto unlock;
+	}
+	if (dev->flags & ATA_DFLAG_NO_UNLOAD) {
+		rc = -EOPNOTSUPP;
+		goto unlock;
+	}
+
+	link = dev->link;
+	if (ap->pflags & ATA_PFLAG_EH_IN_PROGRESS &&
+	    link->eh_context.unloaded_mask & (1 << dev->devno) &&
+	    time_after(dev->unpark_deadline, jiffies))
+		msecs = jiffies_to_msecs(dev->unpark_deadline - jiffies);
+	else
+		msecs = 0;
+
+unlock:
+	spin_unlock_irq(ap->lock);
+
+	return rc ? rc : snprintf(buf, 20, "%u\n", msecs);
+}
+
+static ssize_t ata_scsi_park_store(struct device *device,
+				   struct device_attribute *attr,
+				   const char *buf, size_t len)
+{
+	struct scsi_device *sdev = to_scsi_device(device);
+	struct ata_port *ap;
+	struct ata_device *dev;
+	long int input;
+	unsigned long flags;
+	int rc;
+
+	rc = strict_strtol(buf, 10, &input);
+	if (rc || input < -2)
+		return -EINVAL;
+	if (input > ATA_TMOUT_MAX_PARK) {
+		rc = -EOVERFLOW;
+		input = ATA_TMOUT_MAX_PARK;
+	}
+
+	ap = ata_shost_to_port(sdev->host);
+
+	spin_lock_irqsave(ap->lock, flags);
+	dev = ata_scsi_find_dev(ap, sdev);
+	if (unlikely(!dev)) {
+		rc = -ENODEV;
+		goto unlock;
+	}
+	if (dev->class != ATA_DEV_ATA) {
+		rc = -EOPNOTSUPP;
+		goto unlock;
+	}
+
+	if (input >= 0) {
+		if (dev->flags & ATA_DFLAG_NO_UNLOAD) {
+			rc = -EOPNOTSUPP;
+			goto unlock;
+		}
+
+		dev->unpark_deadline = ata_deadline(jiffies, input);
+		dev->link->eh_info.dev_action[dev->devno] |= ATA_EH_PARK;
+		ata_port_schedule_eh(ap);
+		complete(&ap->park_req_pending);
+	} else {
+		switch (input) {
+		case -1:
+			dev->flags &= ~ATA_DFLAG_NO_UNLOAD;
+			break;
+		case -2:
+			dev->flags |= ATA_DFLAG_NO_UNLOAD;
+			break;
+		}
+	}
+unlock:
+	spin_unlock_irqrestore(ap->lock, flags);
+
+	return rc ? rc : len;
+}
+DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR,
+	    ata_scsi_park_show, ata_scsi_park_store);
+EXPORT_SYMBOL_GPL(dev_attr_unload_heads);
+
 static void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq)
 {
 	cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
@@ -269,6 +368,12 @@
 			ata_scsi_activity_store);
 EXPORT_SYMBOL_GPL(dev_attr_sw_activity);
 
+struct device_attribute *ata_common_sdev_attrs[] = {
+	&dev_attr_unload_heads,
+	NULL
+};
+EXPORT_SYMBOL_GPL(ata_common_sdev_attrs);
+
 static void ata_scsi_invalid_field(struct scsi_cmnd *cmd,
 				   void (*done)(struct scsi_cmnd *))
 {
@@ -954,6 +1059,9 @@
 static int ata_scsi_dev_config(struct scsi_device *sdev,
 			       struct ata_device *dev)
 {
+	if (!ata_id_has_unload(dev->id))
+		dev->flags |= ATA_DFLAG_NO_UNLOAD;
+
 	/* configure max sectors */
 	blk_queue_max_sectors(sdev->request_queue, dev->max_sectors);
 
@@ -977,6 +1085,10 @@
 
 		blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN);
 	} else {
+		if (ata_id_is_ssd(dev->id))
+			queue_flag_set_unlocked(QUEUE_FLAG_NONROT,
+						sdev->request_queue);
+
 		/* ATA devices must be sector aligned */
 		blk_queue_update_dma_alignment(sdev->request_queue,
 					       ATA_SECT_SIZE - 1);
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index ade5c75..fe2839e 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -70,6 +70,7 @@
 extern int libata_fua;
 extern int libata_noacpi;
 extern int libata_allow_tpm;
+extern struct ata_link *ata_dev_phys_link(struct ata_device *dev);
 extern void ata_force_cbl(struct ata_port *ap);
 extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
 extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
@@ -107,6 +108,8 @@
 extern void __ata_qc_complete(struct ata_queued_cmd *qc);
 extern int atapi_check_dma(struct ata_queued_cmd *qc);
 extern void swap_buf_le16(u16 *buf, unsigned int buf_words);
+extern bool ata_phys_link_online(struct ata_link *link);
+extern bool ata_phys_link_offline(struct ata_link *link);
 extern void ata_dev_init(struct ata_device *dev);
 extern void ata_link_init(struct ata_port *ap, struct ata_link *link, int pmp);
 extern int sata_link_init_spd(struct ata_link *link);
@@ -152,7 +155,7 @@
 /* libata-eh.c */
 extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
 extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd);
-extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
+extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
 extern void ata_scsi_error(struct Scsi_Host *host);
 extern void ata_port_wait_eh(struct ata_port *ap);
 extern void ata_eh_fastdrain_timerfn(unsigned long arg);
diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c
index d393290..1266924 100644
--- a/drivers/ata/pata_bf54x.c
+++ b/drivers/ata/pata_bf54x.c
@@ -1632,6 +1632,8 @@
 		return -ENODEV;
 	}
 
+	dev_set_drvdata(&pdev->dev, host);
+
 	return 0;
 }
 
@@ -1648,6 +1650,7 @@
 	struct ata_host *host = dev_get_drvdata(dev);
 
 	ata_host_detach(host);
+	dev_set_drvdata(&pdev->dev, NULL);
 
 	peripheral_free_list(atapi_io_port);
 
@@ -1655,27 +1658,44 @@
 }
 
 #ifdef CONFIG_PM
-int bfin_atapi_suspend(struct platform_device *pdev, pm_message_t state)
+static int bfin_atapi_suspend(struct platform_device *pdev, pm_message_t state)
 {
-	return 0;
+	struct ata_host *host = dev_get_drvdata(&pdev->dev);
+	if (host)
+		return ata_host_suspend(host, state);
+	else
+		return 0;
 }
 
-int bfin_atapi_resume(struct platform_device *pdev)
+static int bfin_atapi_resume(struct platform_device *pdev)
 {
+	struct ata_host *host = dev_get_drvdata(&pdev->dev);
+	int ret;
+
+	if (host) {
+		ret = bfin_reset_controller(host);
+		if (ret) {
+			printk(KERN_ERR DRV_NAME ": Error during HW init\n");
+			return ret;
+		}
+		ata_host_resume(host);
+	}
+
 	return 0;
 }
+#else
+#define bfin_atapi_suspend NULL
+#define bfin_atapi_resume NULL
 #endif
 
 static struct platform_driver bfin_atapi_driver = {
 	.probe			= bfin_atapi_probe,
 	.remove			= __devexit_p(bfin_atapi_remove),
+	.suspend		= bfin_atapi_suspend,
+	.resume			= bfin_atapi_resume,
 	.driver = {
 		.name		= DRV_NAME,
 		.owner		= THIS_MODULE,
-#ifdef CONFIG_PM
-		.suspend	= bfin_atapi_suspend,
-		.resume		= bfin_atapi_resume,
-#endif
 	},
 };
 
diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c
index e970b22..a598bb3 100644
--- a/drivers/ata/pata_sil680.c
+++ b/drivers/ata/pata_sil680.c
@@ -230,7 +230,7 @@
 		tmpbyte & 1, tmpbyte & 0x30);
 
 	*try_mmio = 0;
-#ifdef CONFIG_PPC_MERGE
+#ifdef CONFIG_PPC
 	if (machine_is(cell))
 		*try_mmio = (tmpbyte & 1) || pci_resource_start(pdev, 5);
 #endif
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 3924e72..1a56db9 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -469,10 +469,10 @@
 	return true;
 }
 
-static int sata_fsl_scr_write(struct ata_port *ap, unsigned int sc_reg_in,
-			       u32 val)
+static int sata_fsl_scr_write(struct ata_link *link,
+			      unsigned int sc_reg_in, u32 val)
 {
-	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
+	struct sata_fsl_host_priv *host_priv = link->ap->host->private_data;
 	void __iomem *ssr_base = host_priv->ssr_base;
 	unsigned int sc_reg;
 
@@ -493,10 +493,10 @@
 	return 0;
 }
 
-static int sata_fsl_scr_read(struct ata_port *ap, unsigned int sc_reg_in,
-			u32 *val)
+static int sata_fsl_scr_read(struct ata_link *link,
+			     unsigned int sc_reg_in, u32 *val)
 {
-	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
+	struct sata_fsl_host_priv *host_priv = link->ap->host->private_data;
 	void __iomem *ssr_base = host_priv->ssr_base;
 	unsigned int sc_reg;
 
@@ -645,12 +645,12 @@
 	 * Workaround for 8315DS board 3gbps link-up issue,
 	 * currently limit SATA port to GEN1 speed
 	 */
-	sata_fsl_scr_read(ap, SCR_CONTROL, &temp);
+	sata_fsl_scr_read(&ap->link, SCR_CONTROL, &temp);
 	temp &= ~(0xF << 4);
 	temp |= (0x1 << 4);
-	sata_fsl_scr_write(ap, SCR_CONTROL, temp);
+	sata_fsl_scr_write(&ap->link, SCR_CONTROL, temp);
 
-	sata_fsl_scr_read(ap, SCR_CONTROL, &temp);
+	sata_fsl_scr_read(&ap->link, SCR_CONTROL, &temp);
 	dev_printk(KERN_WARNING, dev, "scr_control, speed limited to %x\n",
 			temp);
 #endif
@@ -868,7 +868,7 @@
 			ioread32(CQ + hcr_base),
 			ioread32(CA + hcr_base), ioread32(CC + hcr_base));
 
-		sata_fsl_scr_read(ap, SCR_ERROR, &Serror);
+		sata_fsl_scr_read(&ap->link, SCR_ERROR, &Serror);
 
 		DPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
 		DPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));
@@ -972,9 +972,9 @@
 	 * Handle & Clear SError
 	 */
 
-	sata_fsl_scr_read(ap, SCR_ERROR, &SError);
+	sata_fsl_scr_read(&ap->link, SCR_ERROR, &SError);
 	if (unlikely(SError & 0xFFFF0000)) {
-		sata_fsl_scr_write(ap, SCR_ERROR, SError);
+		sata_fsl_scr_write(&ap->link, SCR_ERROR, SError);
 	}
 
 	DPRINTK("error_intr,hStat=0x%x,CE=0x%x,DE =0x%x,SErr=0x%x\n",
@@ -1091,7 +1091,7 @@
 
 	hstatus = ioread32(hcr_base + HSTATUS);
 
-	sata_fsl_scr_read(ap, SCR_ERROR, &SError);
+	sata_fsl_scr_read(&ap->link, SCR_ERROR, &SError);
 
 	if (unlikely(SError & 0xFFFF0000)) {
 		DPRINTK("serror @host_intr : 0x%x\n", SError);
diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index 5032c32..fbbd87c 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c
@@ -269,9 +269,9 @@
 	writeb(0xff, port_base + PORT_IRQ_STAT);
 }
 
-static int inic_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val)
+static int inic_scr_read(struct ata_link *link, unsigned sc_reg, u32 *val)
 {
-	void __iomem *scr_addr = inic_port_base(ap) + PORT_SCR;
+	void __iomem *scr_addr = inic_port_base(link->ap) + PORT_SCR;
 	void __iomem *addr;
 
 	if (unlikely(sc_reg >= ARRAY_SIZE(scr_map)))
@@ -286,9 +286,9 @@
 	return 0;
 }
 
-static int inic_scr_write(struct ata_port *ap, unsigned sc_reg, u32 val)
+static int inic_scr_write(struct ata_link *link, unsigned sc_reg, u32 val)
 {
-	void __iomem *scr_addr = inic_port_base(ap) + PORT_SCR;
+	void __iomem *scr_addr = inic_port_base(link->ap) + PORT_SCR;
 
 	if (unlikely(sc_reg >= ARRAY_SIZE(scr_map)))
 		return -EINVAL;
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index c815f8e..2b24ae5 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -493,10 +493,10 @@
 	void (*reset_bus)(struct ata_host *host, void __iomem *mmio);
 };
 
-static int mv_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val);
-static int mv_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val);
-static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val);
-static int mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val);
+static int mv_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val);
+static int mv_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val);
+static int mv5_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val);
+static int mv5_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val);
 static int mv_port_start(struct ata_port *ap);
 static void mv_port_stop(struct ata_port *ap);
 static int mv_qc_defer(struct ata_queued_cmd *qc);
@@ -1070,23 +1070,23 @@
 	return ofs;
 }
 
-static int mv_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val)
+static int mv_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val)
 {
 	unsigned int ofs = mv_scr_offset(sc_reg_in);
 
 	if (ofs != 0xffffffffU) {
-		*val = readl(mv_ap_base(ap) + ofs);
+		*val = readl(mv_ap_base(link->ap) + ofs);
 		return 0;
 	} else
 		return -EINVAL;
 }
 
-static int mv_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val)
+static int mv_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val)
 {
 	unsigned int ofs = mv_scr_offset(sc_reg_in);
 
 	if (ofs != 0xffffffffU) {
-		writelfl(val, mv_ap_base(ap) + ofs);
+		writelfl(val, mv_ap_base(link->ap) + ofs);
 		return 0;
 	} else
 		return -EINVAL;
@@ -2251,11 +2251,11 @@
 	return ofs;
 }
 
-static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val)
+static int mv5_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val)
 {
-	struct mv_host_priv *hpriv = ap->host->private_data;
+	struct mv_host_priv *hpriv = link->ap->host->private_data;
 	void __iomem *mmio = hpriv->base;
-	void __iomem *addr = mv5_phy_base(mmio, ap->port_no);
+	void __iomem *addr = mv5_phy_base(mmio, link->ap->port_no);
 	unsigned int ofs = mv5_scr_offset(sc_reg_in);
 
 	if (ofs != 0xffffffffU) {
@@ -2265,11 +2265,11 @@
 		return -EINVAL;
 }
 
-static int mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val)
+static int mv5_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val)
 {
-	struct mv_host_priv *hpriv = ap->host->private_data;
+	struct mv_host_priv *hpriv = link->ap->host->private_data;
 	void __iomem *mmio = hpriv->base;
-	void __iomem *addr = mv5_phy_base(mmio, ap->port_no);
+	void __iomem *addr = mv5_phy_base(mmio, link->ap->port_no);
 	unsigned int ofs = mv5_scr_offset(sc_reg_in);
 
 	if (ofs != 0xffffffffU) {
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 14601dc..fae3841 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -302,8 +302,8 @@
 static irqreturn_t nv_generic_interrupt(int irq, void *dev_instance);
 static irqreturn_t nv_nf2_interrupt(int irq, void *dev_instance);
 static irqreturn_t nv_ck804_interrupt(int irq, void *dev_instance);
-static int nv_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int nv_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int nv_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 
 static void nv_nf2_freeze(struct ata_port *ap);
 static void nv_nf2_thaw(struct ata_port *ap);
@@ -1511,21 +1511,21 @@
 	return ret;
 }
 
-static int nv_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int nv_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
 
-	*val = ioread32(ap->ioaddr.scr_addr + (sc_reg * 4));
+	*val = ioread32(link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
-static int nv_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
 
-	iowrite32(val, ap->ioaddr.scr_addr + (sc_reg * 4));
+	iowrite32(val, link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
@@ -2218,9 +2218,9 @@
 	if (!pp->qc_active)
 		return;
 
-	if (ap->ops->scr_read(ap, SCR_ERROR, &serror))
+	if (ap->ops->scr_read(&ap->link, SCR_ERROR, &serror))
 		return;
-	ap->ops->scr_write(ap, SCR_ERROR, serror);
+	ap->ops->scr_write(&ap->link, SCR_ERROR, serror);
 
 	if (ata_stat & ATA_ERR) {
 		ata_ehi_clear_desc(ehi);
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 030665b..750d8cd 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -137,8 +137,8 @@
 	dma_addr_t		pkt_dma;
 };
 
-static int pdc_sata_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int pdc_sata_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int pdc_sata_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int pdc_sata_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 static int pdc_ata_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
 static int pdc_common_port_start(struct ata_port *ap);
 static int pdc_sata_port_start(struct ata_port *ap);
@@ -386,19 +386,21 @@
 	return ATA_CBL_SATA;
 }
 
-static int pdc_sata_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int pdc_sata_scr_read(struct ata_link *link,
+			     unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	*val = readl(ap->ioaddr.scr_addr + (sc_reg * 4));
+	*val = readl(link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
-static int pdc_sata_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int pdc_sata_scr_write(struct ata_link *link,
+			      unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	writel(val, ap->ioaddr.scr_addr + (sc_reg * 4));
+	writel(val, link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
@@ -731,7 +733,7 @@
 	if (sata_scr_valid(&ap->link)) {
 		u32 serror;
 
-		pdc_sata_scr_read(ap, SCR_ERROR, &serror);
+		pdc_sata_scr_read(&ap->link, SCR_ERROR, &serror);
 		ehi->serror |= serror;
 	}
 
diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index 1600107..a000c86 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c
@@ -111,8 +111,8 @@
 	qs_state_t		state;
 };
 
-static int qs_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int qs_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int qs_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int qs_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 static int qs_ata_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
 static int qs_port_start(struct ata_port *ap);
 static void qs_host_stop(struct ata_host *host);
@@ -242,11 +242,11 @@
 	return ata_sff_prereset(link, deadline);
 }
 
-static int qs_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int qs_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	*val = readl(ap->ioaddr.scr_addr + (sc_reg * 8));
+	*val = readl(link->ap->ioaddr.scr_addr + (sc_reg * 8));
 	return 0;
 }
 
@@ -256,11 +256,11 @@
 	ata_std_error_handler(ap);
 }
 
-static int qs_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int qs_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	writel(val, ap->ioaddr.scr_addr + (sc_reg * 8));
+	writel(val, link->ap->ioaddr.scr_addr + (sc_reg * 8));
 	return 0;
 }
 
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 88bf421..031d7b7 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -115,8 +115,8 @@
 static int sil_pci_device_resume(struct pci_dev *pdev);
 #endif
 static void sil_dev_config(struct ata_device *dev);
-static int sil_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int sil_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int sil_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int sil_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 static int sil_set_mode(struct ata_link *link, struct ata_device **r_failed);
 static void sil_freeze(struct ata_port *ap);
 static void sil_thaw(struct ata_port *ap);
@@ -317,9 +317,9 @@
 	return NULL;
 }
 
-static int sil_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int sil_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
-	void __iomem *mmio = sil_scr_addr(ap, sc_reg);
+	void __iomem *mmio = sil_scr_addr(link->ap, sc_reg);
 
 	if (mmio) {
 		*val = readl(mmio);
@@ -328,9 +328,9 @@
 	return -EINVAL;
 }
 
-static int sil_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int sil_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
-	void __iomem *mmio = sil_scr_addr(ap, sc_reg);
+	void __iomem *mmio = sil_scr_addr(link->ap, sc_reg);
 
 	if (mmio) {
 		writel(val, mmio);
@@ -352,8 +352,8 @@
 		 * controllers continue to assert IRQ as long as
 		 * SError bits are pending.  Clear SError immediately.
 		 */
-		sil_scr_read(ap, SCR_ERROR, &serror);
-		sil_scr_write(ap, SCR_ERROR, serror);
+		sil_scr_read(&ap->link, SCR_ERROR, &serror);
+		sil_scr_write(&ap->link, SCR_ERROR, serror);
 
 		/* Sometimes spurious interrupts occur, double check
 		 * it's PHYRDY CHG.
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index 84ffcc2..4621807 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -340,8 +340,8 @@
 };
 
 static void sil24_dev_config(struct ata_device *dev);
-static int sil24_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val);
-static int sil24_scr_write(struct ata_port *ap, unsigned sc_reg, u32 val);
+static int sil24_scr_read(struct ata_link *link, unsigned sc_reg, u32 *val);
+static int sil24_scr_write(struct ata_link *link, unsigned sc_reg, u32 val);
 static int sil24_qc_defer(struct ata_queued_cmd *qc);
 static void sil24_qc_prep(struct ata_queued_cmd *qc);
 static unsigned int sil24_qc_issue(struct ata_queued_cmd *qc);
@@ -504,9 +504,9 @@
 	[SCR_ACTIVE]	= 3,
 };
 
-static int sil24_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val)
+static int sil24_scr_read(struct ata_link *link, unsigned sc_reg, u32 *val)
 {
-	void __iomem *scr_addr = sil24_port_base(ap) + PORT_SCONTROL;
+	void __iomem *scr_addr = sil24_port_base(link->ap) + PORT_SCONTROL;
 
 	if (sc_reg < ARRAY_SIZE(sil24_scr_map)) {
 		void __iomem *addr;
@@ -517,9 +517,9 @@
 	return -EINVAL;
 }
 
-static int sil24_scr_write(struct ata_port *ap, unsigned sc_reg, u32 val)
+static int sil24_scr_write(struct ata_link *link, unsigned sc_reg, u32 val)
 {
-	void __iomem *scr_addr = sil24_port_base(ap) + PORT_SCONTROL;
+	void __iomem *scr_addr = sil24_port_base(link->ap) + PORT_SCONTROL;
 
 	if (sc_reg < ARRAY_SIZE(sil24_scr_map)) {
 		void __iomem *addr;
diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c
index 1010b30..9c43b4e 100644
--- a/drivers/ata/sata_sis.c
+++ b/drivers/ata/sata_sis.c
@@ -64,8 +64,8 @@
 };
 
 static int sis_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
-static int sis_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int sis_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int sis_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int sis_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 
 static const struct pci_device_id sis_pci_tbl[] = {
 	{ PCI_VDEVICE(SI, 0x0180), sis_180 },	/* SiS 964/180 */
@@ -134,10 +134,11 @@
 	return addr;
 }
 
-static u32 sis_scr_cfg_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static u32 sis_scr_cfg_read(struct ata_link *link,
+			    unsigned int sc_reg, u32 *val)
 {
-	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-	unsigned int cfg_addr = get_scr_cfg_addr(ap, sc_reg);
+	struct pci_dev *pdev = to_pci_dev(link->ap->host->dev);
+	unsigned int cfg_addr = get_scr_cfg_addr(link->ap, sc_reg);
 	u32 val2 = 0;
 	u8 pmr;
 
@@ -158,10 +159,11 @@
 	return 0;
 }
 
-static int sis_scr_cfg_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int sis_scr_cfg_write(struct ata_link *link,
+			     unsigned int sc_reg, u32 val)
 {
-	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-	unsigned int cfg_addr = get_scr_cfg_addr(ap, sc_reg);
+	struct pci_dev *pdev = to_pci_dev(link->ap->host->dev);
+	unsigned int cfg_addr = get_scr_cfg_addr(link->ap, sc_reg);
 	u8 pmr;
 
 	if (sc_reg == SCR_ERROR) /* doesn't exist in PCI cfg space */
@@ -178,8 +180,9 @@
 	return 0;
 }
 
-static int sis_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int sis_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
+	struct ata_port *ap = link->ap;
 	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
 	u8 pmr;
 
@@ -187,7 +190,7 @@
 		return -EINVAL;
 
 	if (ap->flags & SIS_FLAG_CFGSCR)
-		return sis_scr_cfg_read(ap, sc_reg, val);
+		return sis_scr_cfg_read(link, sc_reg, val);
 
 	pci_read_config_byte(pdev, SIS_PMR, &pmr);
 
@@ -202,8 +205,9 @@
 	return 0;
 }
 
-static int sis_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int sis_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
+	struct ata_port *ap = link->ap;
 	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
 	u8 pmr;
 
@@ -213,7 +217,7 @@
 	pci_read_config_byte(pdev, SIS_PMR, &pmr);
 
 	if (ap->flags & SIS_FLAG_CFGSCR)
-		return sis_scr_cfg_write(ap, sc_reg, val);
+		return sis_scr_cfg_write(link, sc_reg, val);
 	else {
 		iowrite32(val, ap->ioaddr.scr_addr + (sc_reg * 4));
 		if ((pdev->device == 0x0182) || (pdev->device == 0x0183) ||
diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c
index fb13b82..609d147 100644
--- a/drivers/ata/sata_svw.c
+++ b/drivers/ata/sata_svw.c
@@ -123,20 +123,22 @@
 	}
 }
 
-static int k2_sata_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int k2_sata_scr_read(struct ata_link *link,
+			    unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	*val = readl(ap->ioaddr.scr_addr + (sc_reg * 4));
+	*val = readl(link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
 
-static int k2_sata_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int k2_sata_scr_write(struct ata_link *link,
+			     unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	writel(val, ap->ioaddr.scr_addr + (sc_reg * 4));
+	writel(val, link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c
index db529b8..019575b 100644
--- a/drivers/ata/sata_uli.c
+++ b/drivers/ata/sata_uli.c
@@ -57,8 +57,8 @@
 };
 
 static int uli_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
-static int uli_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int uli_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int uli_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int uli_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 
 static const struct pci_device_id uli_pci_tbl[] = {
 	{ PCI_VDEVICE(AL, 0x5289), uli_5289 },
@@ -107,39 +107,39 @@
 	return hpriv->scr_cfg_addr[ap->port_no] + (4 * sc_reg);
 }
 
-static u32 uli_scr_cfg_read(struct ata_port *ap, unsigned int sc_reg)
+static u32 uli_scr_cfg_read(struct ata_link *link, unsigned int sc_reg)
 {
-	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-	unsigned int cfg_addr = get_scr_cfg_addr(ap, sc_reg);
+	struct pci_dev *pdev = to_pci_dev(link->ap->host->dev);
+	unsigned int cfg_addr = get_scr_cfg_addr(link->ap, sc_reg);
 	u32 val;
 
 	pci_read_config_dword(pdev, cfg_addr, &val);
 	return val;
 }
 
-static void uli_scr_cfg_write(struct ata_port *ap, unsigned int scr, u32 val)
+static void uli_scr_cfg_write(struct ata_link *link, unsigned int scr, u32 val)
 {
-	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-	unsigned int cfg_addr = get_scr_cfg_addr(ap, scr);
+	struct pci_dev *pdev = to_pci_dev(link->ap->host->dev);
+	unsigned int cfg_addr = get_scr_cfg_addr(link->ap, scr);
 
 	pci_write_config_dword(pdev, cfg_addr, val);
 }
 
-static int uli_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int uli_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
 
-	*val = uli_scr_cfg_read(ap, sc_reg);
+	*val = uli_scr_cfg_read(link, sc_reg);
 	return 0;
 }
 
-static int uli_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int uli_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL) //SCR_CONTROL=2, SCR_ERROR=1, SCR_STATUS=0
 		return -EINVAL;
 
-	uli_scr_cfg_write(ap, sc_reg, val);
+	uli_scr_cfg_write(link, sc_reg, val);
 	return 0;
 }
 
diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index 96deeb3..1cfa745 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -68,8 +68,8 @@
 };
 
 static int svia_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
-static int svia_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int svia_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int svia_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int svia_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 static void svia_noop_freeze(struct ata_port *ap);
 static int vt6420_prereset(struct ata_link *link, unsigned long deadline);
 static int vt6421_pata_cable_detect(struct ata_port *ap);
@@ -152,19 +152,19 @@
 MODULE_DEVICE_TABLE(pci, svia_pci_tbl);
 MODULE_VERSION(DRV_VERSION);
 
-static int svia_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int svia_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	*val = ioread32(ap->ioaddr.scr_addr + (4 * sc_reg));
+	*val = ioread32(link->ap->ioaddr.scr_addr + (4 * sc_reg));
 	return 0;
 }
 
-static int svia_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int svia_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	iowrite32(val, ap->ioaddr.scr_addr + (4 * sc_reg));
+	iowrite32(val, link->ap->ioaddr.scr_addr + (4 * sc_reg));
 	return 0;
 }
 
@@ -210,20 +210,20 @@
 		goto skip_scr;
 
 	/* Resume phy.  This is the old SATA resume sequence */
-	svia_scr_write(ap, SCR_CONTROL, 0x300);
-	svia_scr_read(ap, SCR_CONTROL, &scontrol); /* flush */
+	svia_scr_write(link, SCR_CONTROL, 0x300);
+	svia_scr_read(link, SCR_CONTROL, &scontrol); /* flush */
 
 	/* wait for phy to become ready, if necessary */
 	do {
 		msleep(200);
-		svia_scr_read(ap, SCR_STATUS, &sstatus);
+		svia_scr_read(link, SCR_STATUS, &sstatus);
 		if ((sstatus & 0xf) != 1)
 			break;
 	} while (time_before(jiffies, timeout));
 
 	/* open code sata_print_link_status() */
-	svia_scr_read(ap, SCR_STATUS, &sstatus);
-	svia_scr_read(ap, SCR_CONTROL, &scontrol);
+	svia_scr_read(link, SCR_STATUS, &sstatus);
+	svia_scr_read(link, SCR_CONTROL, &scontrol);
 
 	online = (sstatus & 0xf) == 0x3;
 
@@ -232,7 +232,7 @@
 			online ? "up" : "down", sstatus, scontrol);
 
 	/* SStatus is read one more time */
-	svia_scr_read(ap, SCR_STATUS, &sstatus);
+	svia_scr_read(link, SCR_STATUS, &sstatus);
 
 	if (!online) {
 		/* tell EH to bail */
diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c
index f3d635c..c57cdff 100644
--- a/drivers/ata/sata_vsc.c
+++ b/drivers/ata/sata_vsc.c
@@ -98,20 +98,22 @@
 			      VSC_SATA_INT_PHY_CHANGE),
 };
 
-static int vsc_sata_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int vsc_sata_scr_read(struct ata_link *link,
+			     unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	*val = readl(ap->ioaddr.scr_addr + (sc_reg * 4));
+	*val = readl(link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
 
-static int vsc_sata_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int vsc_sata_scr_write(struct ata_link *link,
+			      unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	writel(val, ap->ioaddr.scr_addr + (sc_reg * 4));
+	writel(val, link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 31dc0cd..0a5f055 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -54,7 +54,7 @@
  */
 struct class_private {
 	struct kset class_subsys;
-	struct list_head class_devices;
+	struct klist class_devices;
 	struct list_head class_interfaces;
 	struct kset class_dirs;
 	struct mutex class_mutex;
diff --git a/drivers/base/class.c b/drivers/base/class.c
index cc5e28c..eb85e43 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -135,6 +135,20 @@
 	}
 }
 
+static void klist_class_dev_get(struct klist_node *n)
+{
+	struct device *dev = container_of(n, struct device, knode_class);
+
+	get_device(dev);
+}
+
+static void klist_class_dev_put(struct klist_node *n)
+{
+	struct device *dev = container_of(n, struct device, knode_class);
+
+	put_device(dev);
+}
+
 int __class_register(struct class *cls, struct lock_class_key *key)
 {
 	struct class_private *cp;
@@ -145,7 +159,7 @@
 	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
 	if (!cp)
 		return -ENOMEM;
-	INIT_LIST_HEAD(&cp->class_devices);
+	klist_init(&cp->class_devices, klist_class_dev_get, klist_class_dev_put);
 	INIT_LIST_HEAD(&cp->class_interfaces);
 	kset_init(&cp->class_dirs);
 	__mutex_init(&cp->class_mutex, "struct class mutex", key);
@@ -269,6 +283,71 @@
 #endif
 
 /**
+ * class_dev_iter_init - initialize class device iterator
+ * @iter: class iterator to initialize
+ * @class: the class we wanna iterate over
+ * @start: the device to start iterating from, if any
+ * @type: device_type of the devices to iterate over, NULL for all
+ *
+ * Initialize class iterator @iter such that it iterates over devices
+ * of @class.  If @start is set, the list iteration will start there,
+ * otherwise if it is NULL, the iteration starts at the beginning of
+ * the list.
+ */
+void class_dev_iter_init(struct class_dev_iter *iter, struct class *class,
+			 struct device *start, const struct device_type *type)
+{
+	struct klist_node *start_knode = NULL;
+
+	if (start)
+		start_knode = &start->knode_class;
+	klist_iter_init_node(&class->p->class_devices, &iter->ki, start_knode);
+	iter->type = type;
+}
+EXPORT_SYMBOL_GPL(class_dev_iter_init);
+
+/**
+ * class_dev_iter_next - iterate to the next device
+ * @iter: class iterator to proceed
+ *
+ * Proceed @iter to the next device and return it.  Returns NULL if
+ * iteration is complete.
+ *
+ * The returned device is referenced and won't be released till
+ * iterator is proceed to the next device or exited.  The caller is
+ * free to do whatever it wants to do with the device including
+ * calling back into class code.
+ */
+struct device *class_dev_iter_next(struct class_dev_iter *iter)
+{
+	struct klist_node *knode;
+	struct device *dev;
+
+	while (1) {
+		knode = klist_next(&iter->ki);
+		if (!knode)
+			return NULL;
+		dev = container_of(knode, struct device, knode_class);
+		if (!iter->type || iter->type == dev->type)
+			return dev;
+	}
+}
+EXPORT_SYMBOL_GPL(class_dev_iter_next);
+
+/**
+ * class_dev_iter_exit - finish iteration
+ * @iter: class iterator to finish
+ *
+ * Finish an iteration.  Always call this function after iteration is
+ * complete whether the iteration ran till the end or not.
+ */
+void class_dev_iter_exit(struct class_dev_iter *iter)
+{
+	klist_iter_exit(&iter->ki);
+}
+EXPORT_SYMBOL_GPL(class_dev_iter_exit);
+
+/**
  * class_for_each_device - device iterator
  * @class: the class we're iterating
  * @start: the device to start with in the list, if any.
@@ -283,13 +362,13 @@
  * We check the return of @fn each time. If it returns anything
  * other than 0, we break out and return that value.
  *
- * Note, we hold class->class_mutex in this function, so it can not be
- * re-acquired in @fn, otherwise it will self-deadlocking. For
- * example, calls to add or remove class members would be verboten.
+ * @fn is allowed to do anything including calling back into class
+ * code.  There's no locking restriction.
  */
 int class_for_each_device(struct class *class, struct device *start,
 			  void *data, int (*fn)(struct device *, void *))
 {
+	struct class_dev_iter iter;
 	struct device *dev;
 	int error = 0;
 
@@ -301,20 +380,13 @@
 		return -EINVAL;
 	}
 
-	mutex_lock(&class->p->class_mutex);
-	list_for_each_entry(dev, &class->p->class_devices, node) {
-		if (start) {
-			if (start == dev)
-				start = NULL;
-			continue;
-		}
-		dev = get_device(dev);
+	class_dev_iter_init(&iter, class, start, NULL);
+	while ((dev = class_dev_iter_next(&iter))) {
 		error = fn(dev, data);
-		put_device(dev);
 		if (error)
 			break;
 	}
-	mutex_unlock(&class->p->class_mutex);
+	class_dev_iter_exit(&iter);
 
 	return error;
 }
@@ -337,16 +409,15 @@
  *
  * Note, you will need to drop the reference with put_device() after use.
  *
- * We hold class->class_mutex in this function, so it can not be
- * re-acquired in @match, otherwise it will self-deadlocking. For
- * example, calls to add or remove class members would be verboten.
+ * @fn is allowed to do anything including calling back into class
+ * code.  There's no locking restriction.
  */
 struct device *class_find_device(struct class *class, struct device *start,
 				 void *data,
 				 int (*match)(struct device *, void *))
 {
+	struct class_dev_iter iter;
 	struct device *dev;
-	int found = 0;
 
 	if (!class)
 		return NULL;
@@ -356,29 +427,23 @@
 		return NULL;
 	}
 
-	mutex_lock(&class->p->class_mutex);
-	list_for_each_entry(dev, &class->p->class_devices, node) {
-		if (start) {
-			if (start == dev)
-				start = NULL;
-			continue;
-		}
-		dev = get_device(dev);
+	class_dev_iter_init(&iter, class, start, NULL);
+	while ((dev = class_dev_iter_next(&iter))) {
 		if (match(dev, data)) {
-			found = 1;
+			get_device(dev);
 			break;
-		} else
-			put_device(dev);
+		}
 	}
-	mutex_unlock(&class->p->class_mutex);
+	class_dev_iter_exit(&iter);
 
-	return found ? dev : NULL;
+	return dev;
 }
 EXPORT_SYMBOL_GPL(class_find_device);
 
 int class_interface_register(struct class_interface *class_intf)
 {
 	struct class *parent;
+	struct class_dev_iter iter;
 	struct device *dev;
 
 	if (!class_intf || !class_intf->class)
@@ -391,8 +456,10 @@
 	mutex_lock(&parent->p->class_mutex);
 	list_add_tail(&class_intf->node, &parent->p->class_interfaces);
 	if (class_intf->add_dev) {
-		list_for_each_entry(dev, &parent->p->class_devices, node)
+		class_dev_iter_init(&iter, parent, NULL, NULL);
+		while ((dev = class_dev_iter_next(&iter)))
 			class_intf->add_dev(dev, class_intf);
+		class_dev_iter_exit(&iter);
 	}
 	mutex_unlock(&parent->p->class_mutex);
 
@@ -402,6 +469,7 @@
 void class_interface_unregister(struct class_interface *class_intf)
 {
 	struct class *parent = class_intf->class;
+	struct class_dev_iter iter;
 	struct device *dev;
 
 	if (!parent)
@@ -410,8 +478,10 @@
 	mutex_lock(&parent->p->class_mutex);
 	list_del_init(&class_intf->node);
 	if (class_intf->remove_dev) {
-		list_for_each_entry(dev, &parent->p->class_devices, node)
+		class_dev_iter_init(&iter, parent, NULL, NULL);
+		while ((dev = class_dev_iter_next(&iter)))
 			class_intf->remove_dev(dev, class_intf);
+		class_dev_iter_exit(&iter);
 	}
 	mutex_unlock(&parent->p->class_mutex);
 
diff --git a/drivers/base/core.c b/drivers/base/core.c
index d021c98..b98cb14 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -536,7 +536,6 @@
 	klist_init(&dev->klist_children, klist_children_get,
 		   klist_children_put);
 	INIT_LIST_HEAD(&dev->dma_pools);
-	INIT_LIST_HEAD(&dev->node);
 	init_MUTEX(&dev->sem);
 	spin_lock_init(&dev->devres_lock);
 	INIT_LIST_HEAD(&dev->devres_head);
@@ -916,7 +915,8 @@
 	if (dev->class) {
 		mutex_lock(&dev->class->p->class_mutex);
 		/* tie the class to the device */
-		list_add_tail(&dev->node, &dev->class->p->class_devices);
+		klist_add_tail(&dev->knode_class,
+			       &dev->class->p->class_devices);
 
 		/* notify any interfaces that the device is here */
 		list_for_each_entry(class_intf,
@@ -1032,7 +1032,7 @@
 			if (class_intf->remove_dev)
 				class_intf->remove_dev(dev, class_intf);
 		/* remove the device from the class list */
-		list_del_init(&dev->node);
+		klist_del(&dev->knode_class);
 		mutex_unlock(&dev->class->p->class_mutex);
 	}
 	device_remove_file(dev, &uevent_attr);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 0c39782..aa69556 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -109,12 +109,12 @@
 static int
 aoedisk_add_sysfs(struct aoedev *d)
 {
-	return sysfs_create_group(&d->gd->dev.kobj, &attr_group);
+	return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group);
 }
 void
 aoedisk_rm_sysfs(struct aoedev *d)
 {
-	sysfs_remove_group(&d->gd->dev.kobj, &attr_group);
+	sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group);
 }
 
 static int
@@ -276,7 +276,7 @@
 	gd->first_minor = d->sysminor * AOE_PARTITIONS;
 	gd->fops = &aoe_bdops;
 	gd->private_data = d;
-	gd->capacity = d->ssize;
+	set_capacity(gd, d->ssize);
 	snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
 		d->aoemajor, d->aoeminor);
 
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 2f17462..961d29a 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -645,7 +645,7 @@
 		unsigned long flags;
 		u64 ssize;
 
-		ssize = d->gd->capacity;
+		ssize = get_capacity(d->gd);
 		bd = bdget_disk(d->gd, 0);
 
 		if (bd) {
@@ -707,7 +707,7 @@
 	if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
 		return;
 	if (d->gd != NULL) {
-		d->gd->capacity = ssize;
+		set_capacity(d->gd, ssize);
 		d->flags |= DEVFL_NEWSIZE;
 	} else
 		d->flags |= DEVFL_GDALLOC;
@@ -756,12 +756,17 @@
 	unsigned long n_sect = bio->bi_size >> 9;
 	const int rw = bio_data_dir(bio);
 	struct hd_struct *part;
+	int cpu;
 
-	part = get_part(disk, sector);
-	all_stat_inc(disk, part, ios[rw], sector);
-	all_stat_add(disk, part, ticks[rw], duration, sector);
-	all_stat_add(disk, part, sectors[rw], n_sect, sector);
-	all_stat_add(disk, part, io_ticks, duration, sector);
+	cpu = part_stat_lock();
+	part = disk_map_sector_rcu(disk, sector);
+
+	part_stat_inc(cpu, part, ios[rw]);
+	part_stat_add(cpu, part, ticks[rw], duration);
+	part_stat_add(cpu, part, sectors[rw], n_sect);
+	part_stat_add(cpu, part, io_ticks, duration);
+
+	part_stat_unlock();
 }
 
 void
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index a1d813a..6a8038d 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -91,7 +91,7 @@
 	}
 
 	if (d->gd)
-		d->gd->capacity = 0;
+		set_capacity(d->gd, 0);
 
 	d->flags &= ~DEVFL_UP;
 }
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index b73116e..1e1f915 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3460,8 +3460,8 @@
 	       hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not");
 
 	hba[i]->cmd_pool_bits =
-	    kmalloc(((hba[i]->nr_cmds + BITS_PER_LONG -
-		      1) / BITS_PER_LONG) * sizeof(unsigned long), GFP_KERNEL);
+	    kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
+			* sizeof(unsigned long), GFP_KERNEL);
 	hba[i]->cmd_pool = (CommandList_struct *)
 	    pci_alloc_consistent(hba[i]->pdev,
 		    hba[i]->nr_cmds * sizeof(CommandList_struct),
@@ -3493,8 +3493,8 @@
 	/* command and error info recs zeroed out before
 	   they are used */
 	memset(hba[i]->cmd_pool_bits, 0,
-	       ((hba[i]->nr_cmds + BITS_PER_LONG -
-		 1) / BITS_PER_LONG) * sizeof(unsigned long));
+	       DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
+			* sizeof(unsigned long));
 
 	hba[i]->num_luns = 0;
 	hba[i]->highest_lun = -1;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index e1233aa..a3fd87b4 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -365,7 +365,7 @@
 
 static int 
 cciss_scsi_add_entry(int ctlr, int hostno, 
-		unsigned char *scsi3addr, int devtype,
+		struct cciss_scsi_dev_t *device,
 		struct scsi2map *added, int *nadded)
 {
 	/* assumes hba[ctlr]->scsi_ctlr->lock is held */ 
@@ -384,12 +384,12 @@
 	lun = 0;
 	/* Is this device a non-zero lun of a multi-lun device */
 	/* byte 4 of the 8-byte LUN addr will contain the logical unit no. */
-	if (scsi3addr[4] != 0) {
+	if (device->scsi3addr[4] != 0) {
 		/* Search through our list and find the device which */
 		/* has the same 8 byte LUN address, excepting byte 4. */
 		/* Assign the same bus and target for this new LUN. */
 		/* Use the logical unit number from the firmware. */
-		memcpy(addr1, scsi3addr, 8);
+		memcpy(addr1, device->scsi3addr, 8);
 		addr1[4] = 0;
 		for (i = 0; i < n; i++) {
 			sd = &ccissscsi[ctlr].dev[i];
@@ -399,7 +399,7 @@
 			if (memcmp(addr1, addr2, 8) == 0) {
 				bus = sd->bus;
 				target = sd->target;
-				lun = scsi3addr[4];
+				lun = device->scsi3addr[4];
 				break;
 			}
 		}
@@ -420,8 +420,12 @@
 	added[*nadded].lun = sd->lun;
 	(*nadded)++;
 
-	memcpy(&sd->scsi3addr[0], scsi3addr, 8);
-	sd->devtype = devtype;
+	memcpy(sd->scsi3addr, device->scsi3addr, 8);
+	memcpy(sd->vendor, device->vendor, sizeof(sd->vendor));
+	memcpy(sd->revision, device->revision, sizeof(sd->revision));
+	memcpy(sd->device_id, device->device_id, sizeof(sd->device_id));
+	sd->devtype = device->devtype;
+
 	ccissscsi[ctlr].ndevices++;
 
 	/* initially, (before registering with scsi layer) we don't 
@@ -487,6 +491,22 @@
 	CPQ_TAPE_UNLOCK(ctlr, flags);
 }
 
+static int device_is_the_same(struct cciss_scsi_dev_t *dev1,
+	struct cciss_scsi_dev_t *dev2)
+{
+	return dev1->devtype == dev2->devtype &&
+		memcmp(dev1->scsi3addr, dev2->scsi3addr,
+			sizeof(dev1->scsi3addr)) == 0 &&
+		memcmp(dev1->device_id, dev2->device_id,
+			sizeof(dev1->device_id)) == 0 &&
+		memcmp(dev1->vendor, dev2->vendor,
+			sizeof(dev1->vendor)) == 0 &&
+		memcmp(dev1->model, dev2->model,
+			sizeof(dev1->model)) == 0 &&
+		memcmp(dev1->revision, dev2->revision,
+			sizeof(dev1->revision)) == 0;
+}
+
 static int
 adjust_cciss_scsi_table(int ctlr, int hostno,
 	struct cciss_scsi_dev_t sd[], int nsds)
@@ -532,7 +552,7 @@
 		for (j=0;j<nsds;j++) {
 			if (SCSI3ADDR_EQ(sd[j].scsi3addr,
 				csd->scsi3addr)) {
-				if (sd[j].devtype == csd->devtype)
+				if (device_is_the_same(&sd[j], csd))
 					found=2;
 				else
 					found=1;
@@ -548,22 +568,26 @@
 			cciss_scsi_remove_entry(ctlr, hostno, i,
 				removed, &nremoved);
 			/* remove ^^^, hence i not incremented */
-		} 
-		else if (found == 1) { /* device is different kind */
+		} else if (found == 1) { /* device is different in some way */
 			changes++;
-			printk("cciss%d: device c%db%dt%dl%d type changed "
-				"(device type now %s).\n",
-				ctlr, hostno, csd->bus, csd->target, csd->lun,
-					scsi_device_type(csd->devtype));
+			printk("cciss%d: device c%db%dt%dl%d has changed.\n",
+				ctlr, hostno, csd->bus, csd->target, csd->lun);
 			cciss_scsi_remove_entry(ctlr, hostno, i,
 				removed, &nremoved);
 			/* remove ^^^, hence i not incremented */
-			if (cciss_scsi_add_entry(ctlr, hostno,
-				&sd[j].scsi3addr[0], sd[j].devtype,
+			if (cciss_scsi_add_entry(ctlr, hostno, &sd[j],
 				added, &nadded) != 0)
 				/* we just removed one, so add can't fail. */
 					BUG();
 			csd->devtype = sd[j].devtype;
+			memcpy(csd->device_id, sd[j].device_id,
+				sizeof(csd->device_id));
+			memcpy(csd->vendor, sd[j].vendor,
+				sizeof(csd->vendor));
+			memcpy(csd->model, sd[j].model,
+				sizeof(csd->model));
+			memcpy(csd->revision, sd[j].revision,
+				sizeof(csd->revision));
 		} else 		/* device is same as it ever was, */
 			i++;	/* so just move along. */
 	}
@@ -577,7 +601,7 @@
 			csd = &ccissscsi[ctlr].dev[j];
 			if (SCSI3ADDR_EQ(sd[i].scsi3addr,
 				csd->scsi3addr)) {
-				if (sd[i].devtype == csd->devtype)
+				if (device_is_the_same(&sd[i], csd))
 					found=2;	/* found device */
 				else
 					found=1; 	/* found a bug. */
@@ -586,16 +610,14 @@
 		}
 		if (!found) {
 			changes++;
-			if (cciss_scsi_add_entry(ctlr, hostno, 
-
-				&sd[i].scsi3addr[0], sd[i].devtype,
+			if (cciss_scsi_add_entry(ctlr, hostno, &sd[i],
 				added, &nadded) != 0)
 				break;
 		} else if (found == 1) {
 			/* should never happen... */
 			changes++;
-			printk("cciss%d: device unexpectedly changed type\n",
-				ctlr);
+			printk(KERN_WARNING "cciss%d: device "
+				"unexpectedly changed\n", ctlr);
 			/* but if it does happen, we just ignore that device */
 		}
 	}
@@ -1012,7 +1034,8 @@
 
 static int
 cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, 
-		 unsigned char *buf, unsigned char bufsize)
+	unsigned char page, unsigned char *buf,
+	unsigned char bufsize)
 {
 	int rc;
 	CommandList_struct *cp;
@@ -1032,8 +1055,8 @@
 	ei = cp->err_info; 
 
 	cdb[0] = CISS_INQUIRY;
-	cdb[1] = 0;
-	cdb[2] = 0;
+	cdb[1] = (page != 0);
+	cdb[2] = page;
 	cdb[3] = 0;
 	cdb[4] = bufsize;
 	cdb[5] = 0;
@@ -1053,6 +1076,25 @@
 	return rc;	
 }
 
+/* Get the device id from inquiry page 0x83 */
+static int cciss_scsi_get_device_id(ctlr_info_t *c, unsigned char *scsi3addr,
+	unsigned char *device_id, int buflen)
+{
+	int rc;
+	unsigned char *buf;
+
+	if (buflen > 16)
+		buflen = 16;
+	buf = kzalloc(64, GFP_KERNEL);
+	if (!buf)
+		return -1;
+	rc = cciss_scsi_do_inquiry(c, scsi3addr, 0x83, buf, 64);
+	if (rc == 0)
+		memcpy(device_id, &buf[8], buflen);
+	kfree(buf);
+	return rc != 0;
+}
+
 static int
 cciss_scsi_do_report_phys_luns(ctlr_info_t *c, 
 		ReportLunData_struct *buf, int bufsize)
@@ -1142,25 +1184,21 @@
 	ctlr_info_t *c;
 	__u32 num_luns=0;
 	unsigned char *ch;
-	/* unsigned char found[CCISS_MAX_SCSI_DEVS_PER_HBA]; */
-	struct cciss_scsi_dev_t currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA];
+	struct cciss_scsi_dev_t *currentsd, *this_device;
 	int ncurrent=0;
 	int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8;
 	int i;
 
 	c = (ctlr_info_t *) hba[cntl_num];	
 	ld_buff = kzalloc(reportlunsize, GFP_KERNEL);
-	if (ld_buff == NULL) {
-		printk(KERN_ERR "cciss: out of memory\n");
-		return;
-	}
 	inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
-        if (inq_buff == NULL) {
-                printk(KERN_ERR "cciss: out of memory\n");
-                kfree(ld_buff);
-                return;
+	currentsd = kzalloc(sizeof(*currentsd) *
+			(CCISS_MAX_SCSI_DEVS_PER_HBA+1), GFP_KERNEL);
+	if (ld_buff == NULL || inq_buff == NULL || currentsd == NULL) {
+		printk(KERN_ERR "cciss: out of memory\n");
+		goto out;
 	}
-
+	this_device = &currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA];
 	if (cciss_scsi_do_report_phys_luns(c, ld_buff, reportlunsize) == 0) {
 		ch = &ld_buff->LUNListLength[0];
 		num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8;
@@ -1179,23 +1217,34 @@
 
 
 	/* adjust our table of devices */	
-	for(i=0; i<num_luns; i++)
-	{
-		int devtype;
-
+	for (i = 0; i < num_luns; i++) {
 		/* for each physical lun, do an inquiry */
 		if (ld_buff->LUN[i][3] & 0xC0) continue;
 		memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE);
 		memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8);
 
-		if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, inq_buff,
-			(unsigned char) OBDR_TAPE_INQ_SIZE) != 0) {
+		if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, 0, inq_buff,
+			(unsigned char) OBDR_TAPE_INQ_SIZE) != 0)
 			/* Inquiry failed (msg printed already) */
-			devtype = 0; /* so we will skip this device. */
-		} else /* what kind of device is this? */
-			devtype = (inq_buff[0] & 0x1f);
+			continue; /* so we will skip this device. */
 
-		switch (devtype)
+		this_device->devtype = (inq_buff[0] & 0x1f);
+		this_device->bus = -1;
+		this_device->target = -1;
+		this_device->lun = -1;
+		memcpy(this_device->scsi3addr, scsi3addr, 8);
+		memcpy(this_device->vendor, &inq_buff[8],
+			sizeof(this_device->vendor));
+		memcpy(this_device->model, &inq_buff[16],
+			sizeof(this_device->model));
+		memcpy(this_device->revision, &inq_buff[32],
+			sizeof(this_device->revision));
+		memset(this_device->device_id, 0,
+			sizeof(this_device->device_id));
+		cciss_scsi_get_device_id(hba[cntl_num], scsi3addr,
+			this_device->device_id, sizeof(this_device->device_id));
+
+		switch (this_device->devtype)
 		{
 		  case 0x05: /* CD-ROM */ {
 
@@ -1220,15 +1269,10 @@
 			if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
 				printk(KERN_INFO "cciss%d: %s ignored, "
 					"too many devices.\n", cntl_num,
-					scsi_device_type(devtype));
+					scsi_device_type(this_device->devtype));
 				break;
 			}
-			memcpy(&currentsd[ncurrent].scsi3addr[0], 
-				&scsi3addr[0], 8);
-			currentsd[ncurrent].devtype = devtype;
-			currentsd[ncurrent].bus = -1;
-			currentsd[ncurrent].target = -1;
-			currentsd[ncurrent].lun = -1;
+			currentsd[ncurrent] = *this_device;
 			ncurrent++;
 			break;
 		  default: 
@@ -1240,6 +1284,7 @@
 out:
 	kfree(inq_buff);
 	kfree(ld_buff);
+	kfree(currentsd);
 	return;
 }
 
diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h
index d9c2c58..7b75024 100644
--- a/drivers/block/cciss_scsi.h
+++ b/drivers/block/cciss_scsi.h
@@ -66,6 +66,10 @@
 	int devtype;
 	int bus, target, lun;		/* as presented to the OS */
 	unsigned char scsi3addr[8];	/* as presented to the HW */
+	unsigned char device_id[16];	/* from inquiry pg. 0x83 */
+	unsigned char vendor[8];	/* bytes 8-15 of inquiry data */
+	unsigned char model[16];	/* bytes 16-31 of inquiry data */
+	unsigned char revision[4];	/* bytes 32-35 of inquiry data */
 };
 
 struct cciss_scsi_hba_t {
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 09c1434..3d96752 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -424,7 +424,7 @@
 		hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t),
 		&(hba[i]->cmd_pool_dhandle));
 	hba[i]->cmd_pool_bits = kcalloc(
-		(NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG, sizeof(unsigned long),
+		DIV_ROUND_UP(NR_CMDS, BITS_PER_LONG), sizeof(unsigned long),
 		GFP_KERNEL);
 
 	if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 395f8ea..cf64ddf 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -423,8 +423,15 @@
  * 1581's logical side 0 is on physical side 1, whereas the Sharp's logical
  * side 0 is on physical side 0 (but with the misnamed sector IDs).
  * 'stretch' should probably be renamed to something more general, like
- * 'options'.  Other parameters should be self-explanatory (see also
- * setfdprm(8)).
+ * 'options'.
+ *
+ * Bits 2 through 9 of 'stretch' tell the number of the first sector.
+ * The LSB (bit 2) is flipped. For most disks, the first sector
+ * is 1 (represented by 0x00<<2).  For some CP/M and music sampler
+ * disks (such as Ensoniq EPS 16plus) it is 0 (represented as 0x01<<2).
+ * For Amstrad CPC disks it is 0xC1 (represented as 0xC0<<2).
+ *
+ * Other parameters should be self-explanatory (see also setfdprm(8)).
  */
 /*
 	    Size
@@ -1355,20 +1362,20 @@
 	}
 
 	/* Convert step rate from microseconds to milliseconds and 4 bits */
-	srt = 16 - (DP->srt * scale_dtr / 1000 + NOMINAL_DTR - 1) / NOMINAL_DTR;
+	srt = 16 - DIV_ROUND_UP(DP->srt * scale_dtr / 1000, NOMINAL_DTR);
 	if (slow_floppy) {
 		srt = srt / 4;
 	}
 	SUPBOUND(srt, 0xf);
 	INFBOUND(srt, 0);
 
-	hlt = (DP->hlt * scale_dtr / 2 + NOMINAL_DTR - 1) / NOMINAL_DTR;
+	hlt = DIV_ROUND_UP(DP->hlt * scale_dtr / 2, NOMINAL_DTR);
 	if (hlt < 0x01)
 		hlt = 0x01;
 	else if (hlt > 0x7f)
 		hlt = hlt_max_code;
 
-	hut = (DP->hut * scale_dtr / 16 + NOMINAL_DTR - 1) / NOMINAL_DTR;
+	hut = DIV_ROUND_UP(DP->hut * scale_dtr / 16, NOMINAL_DTR);
 	if (hut < 0x1)
 		hut = 0x1;
 	else if (hut > 0xf)
@@ -2236,9 +2243,9 @@
 			}
 		}
 	}
-	if (_floppy->stretch & FD_ZEROBASED) {
+	if (_floppy->stretch & FD_SECTBASEMASK) {
 		for (count = 0; count < F_SECT_PER_TRACK; count++)
-			here[count].sect--;
+			here[count].sect += FD_SECTBASE(_floppy) - 1;
 	}
 }
 
@@ -2385,7 +2392,7 @@
 
 #ifdef FLOPPY_SANITY_CHECK
 	if (nr_sectors / ssize >
-	    (in_sector_offset + current_count_sectors + ssize - 1) / ssize) {
+	    DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) {
 		DPRINT("long rw: %x instead of %lx\n",
 		       nr_sectors, current_count_sectors);
 		printk("rs=%d s=%d\n", R_SECTOR, SECTOR);
@@ -2649,7 +2656,7 @@
 	}
 	HEAD = fsector_t / _floppy->sect;
 
-	if (((_floppy->stretch & (FD_SWAPSIDES | FD_ZEROBASED)) ||
+	if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) ||
 	     TESTF(FD_NEED_TWADDLE)) && fsector_t < _floppy->sect)
 		max_sector = _floppy->sect;
 
@@ -2679,7 +2686,7 @@
 	CODE2SIZE;
 	SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE;
 	SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) +
-	    ((_floppy->stretch & FD_ZEROBASED) ? 0 : 1);
+	    FD_SECTBASE(_floppy);
 
 	/* tracksize describes the size which can be filled up with sectors
 	 * of size ssize.
@@ -3311,7 +3318,7 @@
 	    g->head <= 0 ||
 	    g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) ||
 	    /* check if reserved bits are set */
-	    (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_ZEROBASED)) != 0)
+	    (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0)
 		return -EINVAL;
 	if (type) {
 		if (!capable(CAP_SYS_ADMIN))
@@ -3356,7 +3363,7 @@
 		if (DRS->maxblock > user_params[drive].sect ||
 		    DRS->maxtrack ||
 		    ((user_params[drive].sect ^ oldStretch) &
-		     (FD_SWAPSIDES | FD_ZEROBASED)))
+		     (FD_SWAPSIDES | FD_SECTBASEMASK)))
 			invalidate_drive(bdev);
 		else
 			process_fd_request();
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 1778e4a..7b33512 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -403,7 +403,7 @@
 	BUG_ON(lo->magic != LO_MAGIC);
 
 	lo->pid = current->pid;
-	ret = sysfs_create_file(&lo->disk->dev.kobj, &pid_attr.attr);
+	ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
 	if (ret) {
 		printk(KERN_ERR "nbd: sysfs_create_file failed!");
 		return ret;
@@ -412,7 +412,7 @@
 	while ((req = nbd_read_stat(lo)) != NULL)
 		nbd_end_request(req);
 
-	sysfs_remove_file(&lo->disk->dev.kobj, &pid_attr.attr);
+	sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
 	return 0;
 }
 
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 29b7a64..0e07715 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2544,7 +2544,7 @@
 		if (last_zone != zone) {
 			BUG_ON(last_zone != zone + pd->settings.size);
 			first_sectors = last_zone - bio->bi_sector;
-			bp = bio_split(bio, bio_split_pool, first_sectors);
+			bp = bio_split(bio, first_sectors);
 			BUG_ON(!bp);
 			pkt_make_request(q, &bp->bio1);
 			pkt_make_request(q, &bp->bio2);
@@ -2911,7 +2911,7 @@
 	if (!disk->queue)
 		goto out_mem2;
 
-	pd->pkt_dev = MKDEV(disk->major, disk->first_minor);
+	pd->pkt_dev = MKDEV(pktdev_major, idx);
 	ret = pkt_new_dev(pd, dev);
 	if (ret)
 		goto out_new_dev;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index d797e20..936466f 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -199,7 +199,8 @@
 		if (blk_fs_request(req)) {
 			if (ps3disk_submit_request_sg(dev, req))
 				break;
-		} else if (req->cmd_type == REQ_TYPE_FLUSH) {
+		} else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
+			   req->cmd[0] == REQ_LB_OP_FLUSH) {
 			if (ps3disk_submit_flush_request(dev, req))
 				break;
 		} else {
@@ -257,7 +258,8 @@
 		return IRQ_HANDLED;
 	}
 
-	if (req->cmd_type == REQ_TYPE_FLUSH) {
+	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
+	    req->cmd[0] == REQ_LB_OP_FLUSH) {
 		read = 0;
 		num_sectors = req->hard_cur_sectors;
 		op = "flush";
@@ -405,7 +407,8 @@
 
 	dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
 
-	req->cmd_type = REQ_TYPE_FLUSH;
+	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
+	req->cmd[0] = REQ_LB_OP_FLUSH;
 }
 
 static unsigned long ps3disk_mask;
@@ -538,7 +541,7 @@
 	struct ps3disk_private *priv = dev->sbd.core.driver_data;
 
 	mutex_lock(&ps3disk_mask_mutex);
-	__clear_bit(priv->gendisk->first_minor / PS3DISK_MINORS,
+	__clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS,
 		    &ps3disk_mask);
 	mutex_unlock(&ps3disk_mask_mutex);
 	del_gendisk(priv->gendisk);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4225109..6ec5fc0 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -47,20 +47,20 @@
 
 	spin_lock_irqsave(&vblk->lock, flags);
 	while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) {
-		int uptodate;
+		int error;
 		switch (vbr->status) {
 		case VIRTIO_BLK_S_OK:
-			uptodate = 1;
+			error = 0;
 			break;
 		case VIRTIO_BLK_S_UNSUPP:
-			uptodate = -ENOTTY;
+			error = -ENOTTY;
 			break;
 		default:
-			uptodate = 0;
+			error = -EIO;
 			break;
 		}
 
-		end_dequeued_request(vbr->req, uptodate);
+		__blk_end_request(vbr->req, error, blk_rq_bytes(vbr->req));
 		list_del(&vbr->list);
 		mempool_free(vbr, vblk->pool);
 	}
@@ -84,11 +84,11 @@
 	if (blk_fs_request(vbr->req)) {
 		vbr->out_hdr.type = 0;
 		vbr->out_hdr.sector = vbr->req->sector;
-		vbr->out_hdr.ioprio = vbr->req->ioprio;
+		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
 	} else if (blk_pc_request(vbr->req)) {
 		vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
 		vbr->out_hdr.sector = 0;
-		vbr->out_hdr.ioprio = vbr->req->ioprio;
+		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
 	} else {
 		/* We don't put anything else in the queue. */
 		BUG();
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3ca643c..bff602c 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -105,15 +105,17 @@
 #define GRANT_INVALID_REF	0
 
 #define PARTS_PER_DISK		16
+#define PARTS_PER_EXT_DISK      256
 
 #define BLKIF_MAJOR(dev) ((dev)>>8)
 #define BLKIF_MINOR(dev) ((dev) & 0xff)
 
-#define DEV_NAME	"xvd"	/* name in /dev */
+#define EXT_SHIFT 28
+#define EXTENDED (1<<EXT_SHIFT)
+#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
+#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
 
-/* Information about our VBDs. */
-#define MAX_VBDS 64
-static LIST_HEAD(vbds_list);
+#define DEV_NAME	"xvd"	/* name in /dev */
 
 static int get_id_from_freelist(struct blkfront_info *info)
 {
@@ -386,31 +388,60 @@
 }
 
 
-static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity,
-			       int vdevice, u16 vdisk_info, u16 sector_size,
-			       struct blkfront_info *info)
+static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+			       struct blkfront_info *info,
+			       u16 vdisk_info, u16 sector_size)
 {
 	struct gendisk *gd;
 	int nr_minors = 1;
 	int err = -ENODEV;
+	unsigned int offset;
+	int minor;
+	int nr_parts;
 
 	BUG_ON(info->gd != NULL);
 	BUG_ON(info->rq != NULL);
 
-	if ((minor % PARTS_PER_DISK) == 0)
-		nr_minors = PARTS_PER_DISK;
+	if ((info->vdevice>>EXT_SHIFT) > 1) {
+		/* this is above the extended range; something is wrong */
+		printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
+		return -ENODEV;
+	}
+
+	if (!VDEV_IS_EXTENDED(info->vdevice)) {
+		minor = BLKIF_MINOR(info->vdevice);
+		nr_parts = PARTS_PER_DISK;
+	} else {
+		minor = BLKIF_MINOR_EXT(info->vdevice);
+		nr_parts = PARTS_PER_EXT_DISK;
+	}
+
+	if ((minor % nr_parts) == 0)
+		nr_minors = nr_parts;
 
 	gd = alloc_disk(nr_minors);
 	if (gd == NULL)
 		goto out;
 
-	if (nr_minors > 1)
-		sprintf(gd->disk_name, "%s%c", DEV_NAME,
-			'a' + minor / PARTS_PER_DISK);
-	else
-		sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
-			'a' + minor / PARTS_PER_DISK,
-			minor % PARTS_PER_DISK);
+	offset = minor / nr_parts;
+
+	if (nr_minors > 1) {
+		if (offset < 26)
+			sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
+		else
+			sprintf(gd->disk_name, "%s%c%c", DEV_NAME,
+				'a' + ((offset / 26)-1), 'a' + (offset % 26));
+	} else {
+		if (offset < 26)
+			sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
+				'a' + offset,
+				minor & (nr_parts - 1));
+		else
+			sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME,
+				'a' + ((offset / 26) - 1),
+				'a' + (offset % 26),
+				minor & (nr_parts - 1));
+	}
 
 	gd->major = XENVBD_MAJOR;
 	gd->first_minor = minor;
@@ -699,8 +730,13 @@
 	err = xenbus_scanf(XBT_NIL, dev->nodename,
 			   "virtual-device", "%i", &vdevice);
 	if (err != 1) {
-		xenbus_dev_fatal(dev, err, "reading virtual-device");
-		return err;
+		/* go looking in the extended area instead */
+		err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
+				   "%i", &vdevice);
+		if (err != 1) {
+			xenbus_dev_fatal(dev, err, "reading virtual-device");
+			return err;
+		}
 	}
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
@@ -861,9 +897,7 @@
 	if (err)
 		info->feature_barrier = 0;
 
-	err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice),
-				  sectors, info->vdevice,
-				  binfo, sector_size, info);
+	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
 	if (err) {
 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
 				 info->xbdev->otherend);
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 74031de..d47f2f8 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2097,7 +2097,7 @@
 
 		len = nr * CD_FRAMESIZE_RAW;
 
-		ret = blk_rq_map_user(q, rq, ubuf, len);
+		ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
 		if (ret)
 			break;
 
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1231d95..d6ba77a 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -624,14 +624,14 @@
 		ctrl_outb(1, GDROM_DMA_STATUS_REG);
 		wait_event_interruptible_timeout(request_queue,
 			gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
-		err = gd.transfer;
+		err = gd.transfer ? -EIO : 0;
 		gd.transfer = 0;
 		gd.pending = 0;
 		/* now seek to take the request spinlock
 		* before handling ending the request */
 		spin_lock(&gdrom_lock);
 		list_del_init(&req->queuelist);
-		end_dequeued_request(req, 1 - err);
+		__blk_end_request(req, err, blk_rq_bytes(req));
 	}
 	spin_unlock(&gdrom_lock);
 	kfree(read_command);
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 7ce1ac4..6af435b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -661,10 +661,10 @@
 	if (!disk || !disk->random)
 		return;
 	/* first major is 1, so we get >= 0x200 here */
-	DEBUG_ENT("disk event %d:%d\n", disk->major, disk->first_minor);
+	DEBUG_ENT("disk event %d:%d\n",
+		  MAJOR(disk_devt(disk)), MINOR(disk_devt(disk)));
 
-	add_timer_randomness(disk->random,
-			     0x100 + MKDEV(disk->major, disk->first_minor));
+	add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
 }
 #endif
 
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index f16bb46..03c2cb6 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1113,7 +1113,7 @@
 
 	if (write) {
 		/* disk has become write protected */
-		if (cd->disk->policy) {
+		if (get_disk_ro(cd->disk)) {
 			cdrom_end_request(drive, 0);
 			return ide_stopped;
 		}
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 07ef88b..33ea8c0 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -41,6 +41,12 @@
 #include <asm/io.h>
 #include <asm/div64.h>
 
+#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
+#define IDE_DISK_MINORS		(1 << PARTN_BITS)
+#else
+#define IDE_DISK_MINORS		0
+#endif
+
 struct ide_disk_obj {
 	ide_drive_t	*drive;
 	ide_driver_t	*driver;
@@ -1151,8 +1157,7 @@
 	if (!idkp)
 		goto failed;
 
-	g = alloc_disk_node(1 << PARTN_BITS,
-			hwif_to_node(drive->hwif));
+	g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
 	if (!g)
 		goto out_free_idkp;
 
@@ -1178,9 +1183,11 @@
 	} else
 		drive->attach = 1;
 
-	g->minors = 1 << PARTN_BITS;
+	g->minors = IDE_DISK_MINORS;
 	g->driverfs_dev = &drive->gendev;
-	g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0;
+	g->flags |= GENHD_FL_EXT_DEVT;
+	if (drive->removable)
+		g->flags |= GENHD_FL_REMOVABLE;
 	set_capacity(g, idedisk_capacity(drive));
 	g->fops = &idedisk_ops;
 	add_disk(g);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index a51a30e..70aa86c 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1188,7 +1188,7 @@
 {
 	struct gendisk *p = data;
 	*part &= (1 << PARTN_BITS) - 1;
-	return &p->dev.kobj;
+	return &disk_to_dev(p)->kobj;
 }
 
 static int exact_lock(dev_t dev, void *data)
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 90e19f1..dca401d 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -426,7 +426,7 @@
 				old_nl->next = (uint32_t) ((void *) nl -
 							   (void *) old_nl);
 			disk = dm_disk(hc->md);
-			nl->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor));
+			nl->dev = huge_encode_dev(disk_devt(disk));
 			nl->next = 0;
 			strcpy(nl->name, hc->name);
 
@@ -539,7 +539,7 @@
 	if (dm_suspended(md))
 		param->flags |= DM_SUSPEND_FLAG;
 
-	param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor));
+	param->dev = huge_encode_dev(disk_devt(disk));
 
 	/*
 	 * Yes, this will be out of date by the time it gets back
@@ -548,7 +548,7 @@
 	 */
 	param->open_count = dm_open_count(md);
 
-	if (disk->policy)
+	if (get_disk_ro(disk))
 		param->flags |= DM_READONLY_FLAG;
 
 	param->event_nr = dm_get_event_nr(md);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index b2ab848..103304c 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -34,6 +34,7 @@
 	unsigned fail_count;		/* Cumulative failure count */
 
 	struct dm_path path;
+	struct work_struct deactivate_path;
 };
 
 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
@@ -113,6 +114,7 @@
 static void process_queued_ios(struct work_struct *work);
 static void trigger_event(struct work_struct *work);
 static void activate_path(struct work_struct *work);
+static void deactivate_path(struct work_struct *work);
 
 
 /*-----------------------------------------------
@@ -123,8 +125,10 @@
 {
 	struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
 
-	if (pgpath)
+	if (pgpath) {
 		pgpath->is_active = 1;
+		INIT_WORK(&pgpath->deactivate_path, deactivate_path);
+	}
 
 	return pgpath;
 }
@@ -134,6 +138,14 @@
 	kfree(pgpath);
 }
 
+static void deactivate_path(struct work_struct *work)
+{
+	struct pgpath *pgpath =
+		container_of(work, struct pgpath, deactivate_path);
+
+	blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue);
+}
+
 static struct priority_group *alloc_priority_group(void)
 {
 	struct priority_group *pg;
@@ -873,6 +885,7 @@
 		      pgpath->path.dev->name, m->nr_valid_paths);
 
 	queue_work(kmultipathd, &m->trigger_event);
+	queue_work(kmultipathd, &pgpath->deactivate_path);
 
 out:
 	spin_unlock_irqrestore(&m->lock, flags);
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 4de90ab..b745d8a 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -284,8 +284,8 @@
 
 	memset(major_minor, 0, sizeof(major_minor));
 	sprintf(major_minor, "%d:%d",
-		bio->bi_bdev->bd_disk->major,
-		bio->bi_bdev->bd_disk->first_minor);
+		MAJOR(disk_devt(bio->bi_bdev->bd_disk)),
+		MINOR(disk_devt(bio->bi_bdev->bd_disk)));
 
 	/*
 	 * Test to see which stripe drive triggered the event
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ace998c..327de03 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -377,13 +377,14 @@
 static void start_io_acct(struct dm_io *io)
 {
 	struct mapped_device *md = io->md;
+	int cpu;
 
 	io->start_time = jiffies;
 
-	preempt_disable();
-	disk_round_stats(dm_disk(md));
-	preempt_enable();
-	dm_disk(md)->in_flight = atomic_inc_return(&md->pending);
+	cpu = part_stat_lock();
+	part_round_stats(cpu, &dm_disk(md)->part0);
+	part_stat_unlock();
+	dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
 }
 
 static int end_io_acct(struct dm_io *io)
@@ -391,15 +392,16 @@
 	struct mapped_device *md = io->md;
 	struct bio *bio = io->bio;
 	unsigned long duration = jiffies - io->start_time;
-	int pending;
+	int pending, cpu;
 	int rw = bio_data_dir(bio);
 
-	preempt_disable();
-	disk_round_stats(dm_disk(md));
-	preempt_enable();
-	dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending);
+	cpu = part_stat_lock();
+	part_round_stats(cpu, &dm_disk(md)->part0);
+	part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
+	part_stat_unlock();
 
-	disk_stat_add(dm_disk(md), ticks[rw], duration);
+	dm_disk(md)->part0.in_flight = pending =
+		atomic_dec_return(&md->pending);
 
 	return !pending;
 }
@@ -885,6 +887,7 @@
 	int r = -EIO;
 	int rw = bio_data_dir(bio);
 	struct mapped_device *md = q->queuedata;
+	int cpu;
 
 	/*
 	 * There is no use in forwarding any barrier request since we can't
@@ -897,8 +900,10 @@
 
 	down_read(&md->io_lock);
 
-	disk_stat_inc(dm_disk(md), ios[rw]);
-	disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
+	part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
+	part_stat_unlock();
 
 	/*
 	 * If we're suspended we have to queue
@@ -1146,7 +1151,7 @@
 
 static void free_dev(struct mapped_device *md)
 {
-	int minor = md->disk->first_minor;
+	int minor = MINOR(disk_devt(md->disk));
 
 	if (md->suspended_bdev) {
 		unlock_fs(md);
@@ -1182,7 +1187,7 @@
 	list_splice_init(&md->uevent_list, &uevents);
 	spin_unlock_irqrestore(&md->uevent_lock, flags);
 
-	dm_send_uevents(&uevents, &md->disk->dev.kobj);
+	dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
 
 	atomic_inc(&md->event_nr);
 	wake_up(&md->eventq);
@@ -1267,7 +1272,7 @@
 
 	md = idr_find(&_minor_idr, minor);
 	if (md && (md == MINOR_ALLOCED ||
-		   (dm_disk(md)->first_minor != minor) ||
+		   (MINOR(disk_devt(dm_disk(md))) != minor) ||
 		   test_bit(DMF_FREEING, &md->flags))) {
 		md = NULL;
 		goto out;
@@ -1318,7 +1323,8 @@
 
 	if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
 		map = dm_get_table(md);
-		idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor);
+		idr_replace(&_minor_idr, MINOR_ALLOCED,
+			    MINOR(disk_devt(dm_disk(md))));
 		set_bit(DMF_FREEING, &md->flags);
 		spin_unlock(&_minor_lock);
 		if (!dm_suspended(md)) {
@@ -1638,7 +1644,7 @@
  *---------------------------------------------------------------*/
 void dm_kobject_uevent(struct mapped_device *md)
 {
-	kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE);
+	kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE);
 }
 
 uint32_t dm_next_uevent_seq(struct mapped_device *md)
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index b1eebf8..b9cbee68 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -318,14 +318,18 @@
 	mddev_t *mddev = q->queuedata;
 	dev_info_t *tmp_dev;
 	sector_t block;
+	int cpu;
 
 	if (unlikely(bio_barrier(bio))) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+		      bio_sectors(bio));
+	part_stat_unlock();
 
 	tmp_dev = which_dev(mddev, bio->bi_sector);
 	block = bio->bi_sector >> 1;
@@ -349,7 +353,7 @@
 		 * split it.
 		 */
 		struct bio_pair *bp;
-		bp = bio_split(bio, bio_split_pool,
+		bp = bio_split(bio,
 			       ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector);
 		if (linear_make_request(q, &bp->bio1))
 			generic_make_request(&bp->bio1);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index deeac4b..0a3a4bd 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1464,10 +1464,7 @@
 	if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
 		goto fail;
 
-	if (rdev->bdev->bd_part)
-		ko = &rdev->bdev->bd_part->dev.kobj;
-	else
-		ko = &rdev->bdev->bd_disk->dev.kobj;
+	ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
 	if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) {
 		kobject_del(&rdev->kobj);
 		goto fail;
@@ -3470,8 +3467,8 @@
 	disk->queue = mddev->queue;
 	add_disk(disk);
 	mddev->gendisk = disk;
-	error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj,
-				     "%s", "md");
+	error = kobject_init_and_add(&mddev->kobj, &md_ktype,
+				     &disk_to_dev(disk)->kobj, "%s", "md");
 	mutex_unlock(&disks_mutex);
 	if (error)
 		printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
@@ -3761,7 +3758,7 @@
 	sysfs_notify(&mddev->kobj, NULL, "array_state");
 	sysfs_notify(&mddev->kobj, NULL, "sync_action");
 	sysfs_notify(&mddev->kobj, NULL, "degraded");
-	kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE);
+	kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
 	return 0;
 }
 
@@ -5549,8 +5546,8 @@
 	rcu_read_lock();
 	rdev_for_each_rcu(rdev, mddev) {
 		struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
-		curr_events = disk_stat_read(disk, sectors[0]) + 
-				disk_stat_read(disk, sectors[1]) - 
+		curr_events = part_stat_read(&disk->part0, sectors[0]) +
+				part_stat_read(&disk->part0, sectors[1]) -
 				atomic_read(&disk->sync_io);
 		/* sync IO will cause sync_io to increase before the disk_stats
 		 * as sync_io is counted when a request starts, and
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index c4779cc..8bb8794 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -147,6 +147,7 @@
 	struct multipath_bh * mp_bh;
 	struct multipath_info *multipath;
 	const int rw = bio_data_dir(bio);
+	int cpu;
 
 	if (unlikely(bio_barrier(bio))) {
 		bio_endio(bio, -EOPNOTSUPP);
@@ -158,8 +159,11 @@
 	mp_bh->master_bio = bio;
 	mp_bh->mddev = mddev;
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+		      bio_sectors(bio));
+	part_stat_unlock();
 
 	mp_bh->path = multipath_map(conf);
 	if (mp_bh->path < 0) {
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 1836106..53508a8 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -399,14 +399,18 @@
 	sector_t chunk;
 	sector_t block, rsect;
 	const int rw = bio_data_dir(bio);
+	int cpu;
 
 	if (unlikely(bio_barrier(bio))) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+		      bio_sectors(bio));
+	part_stat_unlock();
 
 	chunk_size = mddev->chunk_size >> 10;
 	chunk_sects = mddev->chunk_size >> 9;
@@ -423,7 +427,7 @@
 		/* This is a one page bio that upper layers
 		 * refuse to split for us, so we need to split it.
 		 */
-		bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
+		bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1)));
 		if (raid0_make_request(q, &bp->bio1))
 			generic_make_request(&bp->bio1);
 		if (raid0_make_request(q, &bp->bio2))
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 03a5ab7..b976442 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -779,7 +779,7 @@
 	struct page **behind_pages = NULL;
 	const int rw = bio_data_dir(bio);
 	const int do_sync = bio_sync(bio);
-	int do_barriers;
+	int cpu, do_barriers;
 	mdk_rdev_t *blocked_rdev;
 
 	/*
@@ -804,8 +804,11 @@
 
 	bitmap = mddev->bitmap;
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+		      bio_sectors(bio));
+	part_stat_unlock();
 
 	/*
 	 * make_request() can abort the operation when READA is being
@@ -1302,9 +1305,6 @@
 					sbio->bi_size = r1_bio->sectors << 9;
 					sbio->bi_idx = 0;
 					sbio->bi_phys_segments = 0;
-					sbio->bi_hw_segments = 0;
-					sbio->bi_hw_front_size = 0;
-					sbio->bi_hw_back_size = 0;
 					sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
 					sbio->bi_flags |= 1 << BIO_UPTODATE;
 					sbio->bi_next = NULL;
@@ -1790,7 +1790,6 @@
 		bio->bi_vcnt = 0;
 		bio->bi_idx = 0;
 		bio->bi_phys_segments = 0;
-		bio->bi_hw_segments = 0;
 		bio->bi_size = 0;
 		bio->bi_end_io = NULL;
 		bio->bi_private = NULL;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index e34cd0e..8bdc9bf 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -789,6 +789,7 @@
 	mirror_info_t *mirror;
 	r10bio_t *r10_bio;
 	struct bio *read_bio;
+	int cpu;
 	int i;
 	int chunk_sects = conf->chunk_mask + 1;
 	const int rw = bio_data_dir(bio);
@@ -816,7 +817,7 @@
 		/* This is a one page bio that upper layers
 		 * refuse to split for us, so we need to split it.
 		 */
-		bp = bio_split(bio, bio_split_pool,
+		bp = bio_split(bio,
 			       chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
 		if (make_request(q, &bp->bio1))
 			generic_make_request(&bp->bio1);
@@ -843,8 +844,11 @@
 	 */
 	wait_barrier(conf);
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+		      bio_sectors(bio));
+	part_stat_unlock();
 
 	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
 
@@ -1345,9 +1349,6 @@
 		tbio->bi_size = r10_bio->sectors << 9;
 		tbio->bi_idx = 0;
 		tbio->bi_phys_segments = 0;
-		tbio->bi_hw_segments = 0;
-		tbio->bi_hw_front_size = 0;
-		tbio->bi_hw_back_size = 0;
 		tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
 		tbio->bi_flags |= 1 << BIO_UPTODATE;
 		tbio->bi_next = NULL;
@@ -1947,7 +1948,6 @@
 		bio->bi_vcnt = 0;
 		bio->bi_idx = 0;
 		bio->bi_phys_segments = 0;
-		bio->bi_hw_segments = 0;
 		bio->bi_size = 0;
 	}
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 224de02..ae16794 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -101,6 +101,40 @@
 const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
 #endif
 
+/*
+ * We maintain a biased count of active stripes in the bottom 16 bits of
+ * bi_phys_segments, and a count of processed stripes in the upper 16 bits
+ */
+static inline int raid5_bi_phys_segments(struct bio *bio)
+{
+	return bio->bi_phys_segments & 0xffff;
+}
+
+static inline int raid5_bi_hw_segments(struct bio *bio)
+{
+	return (bio->bi_phys_segments >> 16) & 0xffff;
+}
+
+static inline int raid5_dec_bi_phys_segments(struct bio *bio)
+{
+	--bio->bi_phys_segments;
+	return raid5_bi_phys_segments(bio);
+}
+
+static inline int raid5_dec_bi_hw_segments(struct bio *bio)
+{
+	unsigned short val = raid5_bi_hw_segments(bio);
+
+	--val;
+	bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio);
+	return val;
+}
+
+static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
+{
+	bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16);
+}
+
 static inline int raid6_next_disk(int disk, int raid_disks)
 {
 	disk++;
@@ -507,7 +541,7 @@
 			while (rbi && rbi->bi_sector <
 				dev->sector + STRIPE_SECTORS) {
 				rbi2 = r5_next_bio(rbi, dev->sector);
-				if (--rbi->bi_phys_segments == 0) {
+				if (!raid5_dec_bi_phys_segments(rbi)) {
 					rbi->bi_next = return_bi;
 					return_bi = rbi;
 				}
@@ -1725,7 +1759,7 @@
 	if (*bip)
 		bi->bi_next = *bip;
 	*bip = bi;
-	bi->bi_phys_segments ++;
+	bi->bi_phys_segments++;
 	spin_unlock_irq(&conf->device_lock);
 	spin_unlock(&sh->lock);
 
@@ -1819,7 +1853,7 @@
 			sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
 			clear_bit(BIO_UPTODATE, &bi->bi_flags);
-			if (--bi->bi_phys_segments == 0) {
+			if (!raid5_dec_bi_phys_segments(bi)) {
 				md_write_end(conf->mddev);
 				bi->bi_next = *return_bi;
 				*return_bi = bi;
@@ -1834,7 +1868,7 @@
 		       sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
 			clear_bit(BIO_UPTODATE, &bi->bi_flags);
-			if (--bi->bi_phys_segments == 0) {
+			if (!raid5_dec_bi_phys_segments(bi)) {
 				md_write_end(conf->mddev);
 				bi->bi_next = *return_bi;
 				*return_bi = bi;
@@ -1858,7 +1892,7 @@
 				struct bio *nextbi =
 					r5_next_bio(bi, sh->dev[i].sector);
 				clear_bit(BIO_UPTODATE, &bi->bi_flags);
-				if (--bi->bi_phys_segments == 0) {
+				if (!raid5_dec_bi_phys_segments(bi)) {
 					bi->bi_next = *return_bi;
 					*return_bi = bi;
 				}
@@ -2033,7 +2067,7 @@
 				while (wbi && wbi->bi_sector <
 					dev->sector + STRIPE_SECTORS) {
 					wbi2 = r5_next_bio(wbi, dev->sector);
-					if (--wbi->bi_phys_segments == 0) {
+					if (!raid5_dec_bi_phys_segments(wbi)) {
 						md_write_end(conf->mddev);
 						wbi->bi_next = *return_bi;
 						*return_bi = wbi;
@@ -2814,7 +2848,7 @@
 				copy_data(0, rbi, dev->page, dev->sector);
 				rbi2 = r5_next_bio(rbi, dev->sector);
 				spin_lock_irq(&conf->device_lock);
-				if (--rbi->bi_phys_segments == 0) {
+				if (!raid5_dec_bi_phys_segments(rbi)) {
 					rbi->bi_next = return_bi;
 					return_bi = rbi;
 				}
@@ -3155,8 +3189,11 @@
 	if(bi) {
 		conf->retry_read_aligned_list = bi->bi_next;
 		bi->bi_next = NULL;
+		/*
+		 * this sets the active strip count to 1 and the processed
+		 * strip count to zero (upper 8 bits)
+		 */
 		bi->bi_phys_segments = 1; /* biased count of active stripes */
-		bi->bi_hw_segments = 0; /* count of processed stripes */
 	}
 
 	return bi;
@@ -3206,8 +3243,7 @@
 	if ((bi->bi_size>>9) > q->max_sectors)
 		return 0;
 	blk_recount_segments(q, bi);
-	if (bi->bi_phys_segments > q->max_phys_segments ||
-	    bi->bi_hw_segments > q->max_hw_segments)
+	if (bi->bi_phys_segments > q->max_phys_segments)
 		return 0;
 
 	if (q->merge_bvec_fn)
@@ -3351,7 +3387,7 @@
 	sector_t logical_sector, last_sector;
 	struct stripe_head *sh;
 	const int rw = bio_data_dir(bi);
-	int remaining;
+	int cpu, remaining;
 
 	if (unlikely(bio_barrier(bi))) {
 		bio_endio(bi, -EOPNOTSUPP);
@@ -3360,8 +3396,11 @@
 
 	md_write_start(mddev, bi);
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+		      bio_sectors(bi));
+	part_stat_unlock();
 
 	if (rw == READ &&
 	     mddev->reshape_position == MaxSector &&
@@ -3468,7 +3507,7 @@
 			
 	}
 	spin_lock_irq(&conf->device_lock);
-	remaining = --bi->bi_phys_segments;
+	remaining = raid5_dec_bi_phys_segments(bi);
 	spin_unlock_irq(&conf->device_lock);
 	if (remaining == 0) {
 
@@ -3752,7 +3791,7 @@
 		     sector += STRIPE_SECTORS,
 		     scnt++) {
 
-		if (scnt < raid_bio->bi_hw_segments)
+		if (scnt < raid5_bi_hw_segments(raid_bio))
 			/* already done this stripe */
 			continue;
 
@@ -3760,7 +3799,7 @@
 
 		if (!sh) {
 			/* failed to get a stripe - must wait */
-			raid_bio->bi_hw_segments = scnt;
+			raid5_set_bi_hw_segments(raid_bio, scnt);
 			conf->retry_read_aligned = raid_bio;
 			return handled;
 		}
@@ -3768,7 +3807,7 @@
 		set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
 		if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
 			release_stripe(sh);
-			raid_bio->bi_hw_segments = scnt;
+			raid5_set_bi_hw_segments(raid_bio, scnt);
 			conf->retry_read_aligned = raid_bio;
 			return handled;
 		}
@@ -3778,7 +3817,7 @@
 		handled++;
 	}
 	spin_lock_irq(&conf->device_lock);
-	remaining = --raid_bio->bi_phys_segments;
+	remaining = raid5_dec_bi_phys_segments(raid_bio);
 	spin_unlock_irq(&conf->device_lock);
 	if (remaining == 0)
 		bio_endio(raid_bio, 0);
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index d2d2318..6e291bf 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -197,7 +197,7 @@
 static int mspro_block_disk_release(struct gendisk *disk)
 {
 	struct mspro_block_data *msb = disk->private_data;
-	int disk_id = disk->first_minor >> MSPRO_BLOCK_PART_SHIFT;
+	int disk_id = MINOR(disk_devt(disk)) >> MSPRO_BLOCK_PART_SHIFT;
 
 	mutex_lock(&mspro_block_disk_lock);
 
@@ -828,7 +828,7 @@
 
 	if (msb->eject) {
 		while ((req = elv_next_request(q)) != NULL)
-			end_queued_request(req, -ENODEV);
+			__blk_end_request(req, -ENODEV, blk_rq_bytes(req));
 
 		return;
 	}
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index ebc8b9d..efacee0 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -29,6 +29,7 @@
 #include <linux/blkdev.h>
 #include <linux/mutex.h>
 #include <linux/scatterlist.h>
+#include <linux/string_helpers.h>
 
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
@@ -83,7 +84,7 @@
 	mutex_lock(&open_lock);
 	md->usage--;
 	if (md->usage == 0) {
-		int devidx = md->disk->first_minor >> MMC_SHIFT;
+		int devidx = MINOR(disk_devt(md->disk)) >> MMC_SHIFT;
 		__clear_bit(devidx, dev_use);
 
 		put_disk(md->disk);
@@ -532,6 +533,8 @@
 	struct mmc_blk_data *md;
 	int err;
 
+	char cap_str[10];
+
 	/*
 	 * Check that the card supports the command class(es) we need.
 	 */
@@ -546,10 +549,11 @@
 	if (err)
 		goto out;
 
-	printk(KERN_INFO "%s: %s %s %lluKiB %s\n",
+	string_get_size(get_capacity(md->disk) << 9, STRING_UNITS_2,
+			cap_str, sizeof(cap_str));
+	printk(KERN_INFO "%s: %s %s %s %s\n",
 		md->disk->disk_name, mmc_card_id(card), mmc_card_name(card),
-		(unsigned long long)(get_capacity(md->disk) >> 1),
-		md->read_only ? "(ro)" : "");
+		cap_str, md->read_only ? "(ro)" : "");
 
 	mmc_set_drvdata(card, md);
 	add_disk(md->disk);
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index f34f20c..9bf581c 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -1005,6 +1005,29 @@
 	return ftl_write((void *)dev, buf, block, 1);
 }
 
+static int ftl_discardsect(struct mtd_blktrans_dev *dev,
+			   unsigned long sector, unsigned nr_sects)
+{
+	partition_t *part = (void *)dev;
+	uint32_t bsize = 1 << part->header.EraseUnitSize;
+
+	DEBUG(1, "FTL erase sector %ld for %d sectors\n",
+	      sector, nr_sects);
+
+	while (nr_sects) {
+		uint32_t old_addr = part->VirtualBlockMap[sector];
+		if (old_addr != 0xffffffff) {
+			part->VirtualBlockMap[sector] = 0xffffffff;
+			part->EUNInfo[old_addr/bsize].Deleted++;
+			if (set_bam_entry(part, old_addr, 0))
+				return -EIO;
+		}
+		nr_sects--;
+		sector++;
+	}
+
+	return 0;
+}
 /*====================================================================*/
 
 static void ftl_freepart(partition_t *part)
@@ -1069,6 +1092,7 @@
 	.blksize 	= SECTOR_SIZE,
 	.readsect	= ftl_readsect,
 	.writesect	= ftl_writesect,
+	.discard	= ftl_discardsect,
 	.getgeo		= ftl_getgeo,
 	.add_mtd	= ftl_add_mtd,
 	.remove_dev	= ftl_remove_dev,
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 9ff007c..681d5ac 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -32,6 +32,14 @@
 	spinlock_t queue_lock;
 };
 
+static int blktrans_discard_request(struct request_queue *q,
+				    struct request *req)
+{
+	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
+	req->cmd[0] = REQ_LB_OP_DISCARD;
+	return 0;
+}
+
 static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 			       struct mtd_blktrans_dev *dev,
 			       struct request *req)
@@ -44,6 +52,10 @@
 
 	buf = req->buffer;
 
+	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
+	    req->cmd[0] == REQ_LB_OP_DISCARD)
+		return !tr->discard(dev, block, nsect);
+
 	if (!blk_fs_request(req))
 		return 0;
 
@@ -367,6 +379,10 @@
 
 	tr->blkcore_priv->rq->queuedata = tr;
 	blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize);
+	if (tr->discard)
+		blk_queue_set_discard(tr->blkcore_priv->rq,
+				      blktrans_discard_request);
+
 	tr->blkshift = ffs(tr->blksize) - 1;
 
 	tr->blkcore_priv->thread = kthread_run(mtd_blktrans_thread, tr,
diff --git a/drivers/pnp/Makefile b/drivers/pnp/Makefile
index 26f5abc..e83f34f 100644
--- a/drivers/pnp/Makefile
+++ b/drivers/pnp/Makefile
@@ -2,12 +2,15 @@
 # Makefile for the Linux Plug-and-Play Support.
 #
 
-obj-y		:= core.o card.o driver.o resource.o manager.o support.o interface.o quirks.o system.o
+obj-y		:= core.o card.o driver.o resource.o manager.o support.o interface.o quirks.o
 
 obj-$(CONFIG_PNPACPI)		+= pnpacpi/
 obj-$(CONFIG_PNPBIOS)		+= pnpbios/
 obj-$(CONFIG_ISAPNP)		+= isapnp/
 
+# pnp_system_init goes after pnpacpi/pnpbios init
+obj-y				+= system.o
+
 ifeq ($(CONFIG_PNP_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
 endif
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index c1b9ea3..53561d7 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -268,7 +268,7 @@
 	return 0;
 }
 
-subsys_initcall(pnpacpi_init);
+fs_initcall(pnpacpi_init);
 
 static int __init pnpacpi_setup(char *str)
 {
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index 19a4be1..662dfcdd 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -571,7 +571,7 @@
 	return 0;
 }
 
-subsys_initcall(pnpbios_init);
+fs_initcall(pnpbios_init);
 
 static int __init pnpbios_thread_init(void)
 {
diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c
index 03c0e40..e3b5c4d 100644
--- a/drivers/s390/block/dasd_proc.c
+++ b/drivers/s390/block/dasd_proc.c
@@ -76,7 +76,8 @@
 	/* Print kdev. */
 	if (block->gdp)
 		seq_printf(m, " at (%3d:%6d)",
-			   block->gdp->major, block->gdp->first_minor);
+			   MAJOR(disk_devt(block->gdp)),
+			   MINOR(disk_devt(block->gdp)));
 	else
 		seq_printf(m, "  at (???:??????)");
 	/* Print device name. */
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 711b300..9481e4a 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -114,7 +114,7 @@
 		found = 0;
 		// test if minor available
 		list_for_each_entry(entry, &dcssblk_devices, lh)
-			if (minor == entry->gd->first_minor)
+			if (minor == MINOR(disk_devt(entry->gd)))
 				found++;
 		if (!found) break; // got unused minor
 	}
@@ -397,7 +397,7 @@
 		goto unload_seg;
 	}
 	sprintf(dev_info->gd->disk_name, "dcssblk%d",
-		dev_info->gd->first_minor);
+		MINOR(disk_devt(dev_info->gd)));
 	list_add_tail(&dev_info->lh, &dcssblk_devices);
 
 	if (!try_module_get(THIS_MODULE)) {
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index 90abfd0..24255e4 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -88,11 +88,13 @@
 	strncpy(zfcp_data.init_busid, token, BUS_ID_SIZE);
 
 	token = strsep(&str, ",");
-	if (!token || strict_strtoull(token, 0, &zfcp_data.init_wwpn))
+	if (!token || strict_strtoull(token, 0,
+				(unsigned long long *) &zfcp_data.init_wwpn))
 		goto err_out;
 
 	token = strsep(&str, ",");
-	if (!token || strict_strtoull(token, 0, &zfcp_data.init_fcp_lun))
+	if (!token || strict_strtoull(token, 0,
+				(unsigned long long *) &zfcp_data.init_fcp_lun))
 		goto err_out;
 
 	kfree(str);
@@ -100,24 +102,10 @@
 
  err_out:
 	kfree(str);
-	pr_err("zfcp: Parse error for device parameter string %s, "
-	       "device not attached.\n", devstr);
+	pr_err("zfcp: %s is not a valid SCSI device\n", devstr);
 	return 0;
 }
 
-static struct zfcp_adapter *zfcp_get_adapter_by_busid(char *bus_id)
-{
-	struct zfcp_adapter *adapter;
-
-	list_for_each_entry(adapter, &zfcp_data.adapter_list_head, list)
-		if ((strncmp(bus_id, adapter->ccw_device->dev.bus_id,
-			     BUS_ID_SIZE) == 0) &&
-		    !(atomic_read(&adapter->status) &
-		      ZFCP_STATUS_COMMON_REMOVE))
-		    return adapter;
-	return NULL;
-}
-
 static void __init zfcp_init_device_configure(void)
 {
 	struct zfcp_adapter *adapter;
@@ -141,7 +129,12 @@
 		goto out_unit;
 	up(&zfcp_data.config_sema);
 	ccw_device_set_online(adapter->ccw_device);
+
 	zfcp_erp_wait(adapter);
+	wait_event(adapter->erp_done_wqh,
+		   !(atomic_read(&unit->status) &
+				ZFCP_STATUS_UNIT_SCSI_WORK_PENDING));
+
 	down(&zfcp_data.config_sema);
 	zfcp_unit_put(unit);
 out_unit:
@@ -180,9 +173,9 @@
 	if (!zfcp_data.gid_pn_cache)
 		goto out_gid_cache;
 
-	INIT_LIST_HEAD(&zfcp_data.adapter_list_head);
-	INIT_LIST_HEAD(&zfcp_data.adapter_remove_lh);
+	zfcp_data.work_queue = create_singlethread_workqueue("zfcp_wq");
 
+	INIT_LIST_HEAD(&zfcp_data.adapter_list_head);
 	sema_init(&zfcp_data.config_sema, 1);
 	rwlock_init(&zfcp_data.config_lock);
 
@@ -193,13 +186,14 @@
 
 	retval = misc_register(&zfcp_cfdc_misc);
 	if (retval) {
-		pr_err("zfcp: registration of misc device zfcp_cfdc failed\n");
+		pr_err("zfcp: Registering the misc device zfcp_cfdc failed\n");
 		goto out_misc;
 	}
 
 	retval = zfcp_ccw_register();
 	if (retval) {
-		pr_err("zfcp: Registration with common I/O layer failed.\n");
+		pr_err("zfcp: The zfcp device driver could not register with "
+		       "the common I/O layer\n");
 		goto out_ccw_register;
 	}
 
@@ -231,8 +225,7 @@
  *
  * Returns: pointer to zfcp_unit or NULL
  */
-struct zfcp_unit *zfcp_get_unit_by_lun(struct zfcp_port *port,
-				       fcp_lun_t fcp_lun)
+struct zfcp_unit *zfcp_get_unit_by_lun(struct zfcp_port *port, u64 fcp_lun)
 {
 	struct zfcp_unit *unit;
 
@@ -251,7 +244,7 @@
  * Returns: pointer to zfcp_port or NULL
  */
 struct zfcp_port *zfcp_get_port_by_wwpn(struct zfcp_adapter *adapter,
-					wwn_t wwpn)
+					u64 wwpn)
 {
 	struct zfcp_port *port;
 
@@ -276,7 +269,7 @@
  *
  * Sets up some unit internal structures and creates sysfs entry.
  */
-struct zfcp_unit *zfcp_unit_enqueue(struct zfcp_port *port, fcp_lun_t fcp_lun)
+struct zfcp_unit *zfcp_unit_enqueue(struct zfcp_port *port, u64 fcp_lun)
 {
 	struct zfcp_unit *unit;
 
@@ -290,7 +283,8 @@
 	unit->port = port;
 	unit->fcp_lun = fcp_lun;
 
-	snprintf(unit->sysfs_device.bus_id, BUS_ID_SIZE, "0x%016llx", fcp_lun);
+	snprintf(unit->sysfs_device.bus_id, BUS_ID_SIZE, "0x%016llx",
+		 (unsigned long long) fcp_lun);
 	unit->sysfs_device.parent = &port->sysfs_device;
 	unit->sysfs_device.release = zfcp_sysfs_unit_release;
 	dev_set_drvdata(&unit->sysfs_device, unit);
@@ -323,7 +317,6 @@
 	}
 
 	zfcp_unit_get(unit);
-	unit->scsi_lun = scsilun_to_int((struct scsi_lun *)&unit->fcp_lun);
 
 	write_lock_irq(&zfcp_data.config_lock);
 	list_add_tail(&unit->list, &port->unit_list_head);
@@ -332,7 +325,6 @@
 
 	write_unlock_irq(&zfcp_data.config_lock);
 
-	port->units++;
 	zfcp_port_get(port);
 
 	return unit;
@@ -351,11 +343,10 @@
  */
 void zfcp_unit_dequeue(struct zfcp_unit *unit)
 {
-	zfcp_unit_wait(unit);
+	wait_event(unit->remove_wq, atomic_read(&unit->refcount) == 0);
 	write_lock_irq(&zfcp_data.config_lock);
 	list_del(&unit->list);
 	write_unlock_irq(&zfcp_data.config_lock);
-	unit->port->units--;
 	zfcp_port_put(unit->port);
 	sysfs_remove_group(&unit->sysfs_device.kobj, &zfcp_sysfs_unit_attrs);
 	device_unregister(&unit->sysfs_device);
@@ -416,11 +407,6 @@
 		mempool_destroy(adapter->pool.data_gid_pn);
 }
 
-static void zfcp_dummy_release(struct device *dev)
-{
-	return;
-}
-
 /**
  * zfcp_status_read_refill - refill the long running status_read_requests
  * @adapter: ptr to struct zfcp_adapter for which the buffers should be refilled
@@ -450,19 +436,6 @@
 					     stat_work));
 }
 
-static int zfcp_nameserver_enqueue(struct zfcp_adapter *adapter)
-{
-	struct zfcp_port *port;
-
-	port = zfcp_port_enqueue(adapter, 0, ZFCP_STATUS_PORT_WKA,
-				 ZFCP_DID_DIRECTORY_SERVICE);
-	if (IS_ERR(port))
-		return PTR_ERR(port);
-	zfcp_port_put(port);
-
-	return 0;
-}
-
 /**
  * zfcp_adapter_enqueue - enqueue a new adapter to the list
  * @ccw_device: pointer to the struct cc_device
@@ -508,7 +481,6 @@
 	init_waitqueue_head(&adapter->erp_done_wqh);
 
 	INIT_LIST_HEAD(&adapter->port_list_head);
-	INIT_LIST_HEAD(&adapter->port_remove_lh);
 	INIT_LIST_HEAD(&adapter->erp_ready_head);
 	INIT_LIST_HEAD(&adapter->erp_running_head);
 
@@ -518,7 +490,7 @@
 	spin_lock_init(&adapter->san_dbf_lock);
 	spin_lock_init(&adapter->scsi_dbf_lock);
 	spin_lock_init(&adapter->rec_dbf_lock);
-	spin_lock_init(&adapter->req_q.lock);
+	spin_lock_init(&adapter->req_q_lock);
 
 	rwlock_init(&adapter->erp_lock);
 	rwlock_init(&adapter->abort_lock);
@@ -537,28 +509,15 @@
 			       &zfcp_sysfs_adapter_attrs))
 		goto sysfs_failed;
 
-	adapter->generic_services.parent = &adapter->ccw_device->dev;
-	adapter->generic_services.release = zfcp_dummy_release;
-	snprintf(adapter->generic_services.bus_id, BUS_ID_SIZE,
-		 "generic_services");
-
-	if (device_register(&adapter->generic_services))
-		goto generic_services_failed;
-
 	write_lock_irq(&zfcp_data.config_lock);
 	atomic_clear_mask(ZFCP_STATUS_COMMON_REMOVE, &adapter->status);
 	list_add_tail(&adapter->list, &zfcp_data.adapter_list_head);
 	write_unlock_irq(&zfcp_data.config_lock);
 
-	zfcp_data.adapters++;
-
-	zfcp_nameserver_enqueue(adapter);
+	zfcp_fc_nameserver_init(adapter);
 
 	return 0;
 
-generic_services_failed:
-	sysfs_remove_group(&ccw_device->dev.kobj,
-			   &zfcp_sysfs_adapter_attrs);
 sysfs_failed:
 	zfcp_adapter_debug_unregister(adapter);
 debug_register_failed:
@@ -585,7 +544,6 @@
 	cancel_work_sync(&adapter->scan_work);
 	cancel_work_sync(&adapter->stat_work);
 	zfcp_adapter_scsi_unregister(adapter);
-	device_unregister(&adapter->generic_services);
 	sysfs_remove_group(&adapter->ccw_device->dev.kobj,
 			   &zfcp_sysfs_adapter_attrs);
 	dev_set_drvdata(&adapter->ccw_device->dev, NULL);
@@ -603,9 +561,6 @@
 	list_del(&adapter->list);
 	write_unlock_irq(&zfcp_data.config_lock);
 
-	/* decrease number of adapters in list */
-	zfcp_data.adapters--;
-
 	zfcp_qdio_free(adapter);
 
 	zfcp_free_low_mem_buffers(adapter);
@@ -633,21 +588,19 @@
  * d_id is used to enqueue ports with a well known address like the Directory
  * Service for nameserver lookup.
  */
-struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, wwn_t wwpn,
+struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn,
 				     u32 status, u32 d_id)
 {
 	struct zfcp_port *port;
 	int retval;
-	char *bus_id;
 
 	port = kzalloc(sizeof(struct zfcp_port), GFP_KERNEL);
 	if (!port)
 		return ERR_PTR(-ENOMEM);
 
 	init_waitqueue_head(&port->remove_wq);
-
 	INIT_LIST_HEAD(&port->unit_list_head);
-	INIT_LIST_HEAD(&port->unit_remove_lh);
+	INIT_WORK(&port->gid_pn_work, zfcp_erp_port_strategy_open_lookup);
 
 	port->adapter = adapter;
 	port->d_id = d_id;
@@ -657,34 +610,9 @@
 	atomic_set_mask(status | ZFCP_STATUS_COMMON_REMOVE, &port->status);
 	atomic_set(&port->refcount, 0);
 
-	if (status & ZFCP_STATUS_PORT_WKA) {
-		switch (d_id) {
-		case ZFCP_DID_DIRECTORY_SERVICE:
-			bus_id = "directory";
-			break;
-		case ZFCP_DID_MANAGEMENT_SERVICE:
-			bus_id = "management";
-			break;
-		case ZFCP_DID_KEY_DISTRIBUTION_SERVICE:
-			bus_id = "key_distribution";
-			break;
-		case ZFCP_DID_ALIAS_SERVICE:
-			bus_id = "alias";
-			break;
-		case ZFCP_DID_TIME_SERVICE:
-			bus_id = "time";
-			break;
-		default:
-			kfree(port);
-			return ERR_PTR(-EINVAL);
-		}
-		snprintf(port->sysfs_device.bus_id, BUS_ID_SIZE, "%s", bus_id);
-		port->sysfs_device.parent = &adapter->generic_services;
-	} else {
-		snprintf(port->sysfs_device.bus_id,
-			 BUS_ID_SIZE, "0x%016llx", wwpn);
-		port->sysfs_device.parent = &adapter->ccw_device->dev;
-	}
+	snprintf(port->sysfs_device.bus_id, BUS_ID_SIZE, "0x%016llx",
+		 (unsigned long long) wwpn);
+	port->sysfs_device.parent = &adapter->ccw_device->dev;
 
 	port->sysfs_device.release = zfcp_sysfs_port_release;
 	dev_set_drvdata(&port->sysfs_device, port);
@@ -700,12 +628,8 @@
 	if (device_register(&port->sysfs_device))
 		goto err_out_free;
 
-	if (status & ZFCP_STATUS_PORT_WKA)
-		retval = sysfs_create_group(&port->sysfs_device.kobj,
-					    &zfcp_sysfs_ns_port_attrs);
-	else
-		retval = sysfs_create_group(&port->sysfs_device.kobj,
-					    &zfcp_sysfs_port_attrs);
+	retval = sysfs_create_group(&port->sysfs_device.kobj,
+				    &zfcp_sysfs_port_attrs);
 
 	if (retval) {
 		device_unregister(&port->sysfs_device);
@@ -718,10 +642,6 @@
 	list_add_tail(&port->list, &adapter->port_list_head);
 	atomic_clear_mask(ZFCP_STATUS_COMMON_REMOVE, &port->status);
 	atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING, &port->status);
-	if (d_id == ZFCP_DID_DIRECTORY_SERVICE)
-		if (!adapter->nameserver_port)
-			adapter->nameserver_port = port;
-	adapter->ports++;
 
 	write_unlock_irq(&zfcp_data.config_lock);
 
@@ -740,21 +660,15 @@
  */
 void zfcp_port_dequeue(struct zfcp_port *port)
 {
-	zfcp_port_wait(port);
+	wait_event(port->remove_wq, atomic_read(&port->refcount) == 0);
 	write_lock_irq(&zfcp_data.config_lock);
 	list_del(&port->list);
-	port->adapter->ports--;
 	write_unlock_irq(&zfcp_data.config_lock);
 	if (port->rport)
 		fc_remote_port_delete(port->rport);
 	port->rport = NULL;
 	zfcp_adapter_put(port->adapter);
-	if (atomic_read(&port->status) & ZFCP_STATUS_PORT_WKA)
-		sysfs_remove_group(&port->sysfs_device.kobj,
-				   &zfcp_sysfs_ns_port_attrs);
-	else
-		sysfs_remove_group(&port->sysfs_device.kobj,
-				   &zfcp_sysfs_port_attrs);
+	sysfs_remove_group(&port->sysfs_device.kobj, &zfcp_sysfs_port_attrs);
 	device_unregister(&port->sysfs_device);
 }
 
diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c
index 51b6a05..b04038c 100644
--- a/drivers/s390/scsi/zfcp_ccw.c
+++ b/drivers/s390/scsi/zfcp_ccw.c
@@ -25,7 +25,8 @@
 	down(&zfcp_data.config_sema);
 	if (zfcp_adapter_enqueue(ccw_device)) {
 		dev_err(&ccw_device->dev,
-			"Setup of data structures failed.\n");
+			"Setting up data structures for the "
+			"FCP adapter failed\n");
 		retval = -EINVAL;
 	}
 	up(&zfcp_data.config_sema);
@@ -46,6 +47,8 @@
 	struct zfcp_adapter *adapter;
 	struct zfcp_port *port, *p;
 	struct zfcp_unit *unit, *u;
+	LIST_HEAD(unit_remove_lh);
+	LIST_HEAD(port_remove_lh);
 
 	ccw_device_set_offline(ccw_device);
 	down(&zfcp_data.config_sema);
@@ -54,26 +57,26 @@
 	write_lock_irq(&zfcp_data.config_lock);
 	list_for_each_entry_safe(port, p, &adapter->port_list_head, list) {
 		list_for_each_entry_safe(unit, u, &port->unit_list_head, list) {
-			list_move(&unit->list, &port->unit_remove_lh);
+			list_move(&unit->list, &unit_remove_lh);
 			atomic_set_mask(ZFCP_STATUS_COMMON_REMOVE,
 					&unit->status);
 		}
-		list_move(&port->list, &adapter->port_remove_lh);
+		list_move(&port->list, &port_remove_lh);
 		atomic_set_mask(ZFCP_STATUS_COMMON_REMOVE, &port->status);
 	}
 	atomic_set_mask(ZFCP_STATUS_COMMON_REMOVE, &adapter->status);
 	write_unlock_irq(&zfcp_data.config_lock);
 
-	list_for_each_entry_safe(port, p, &adapter->port_remove_lh, list) {
-		list_for_each_entry_safe(unit, u, &port->unit_remove_lh, list) {
-			if (atomic_test_mask(ZFCP_STATUS_UNIT_REGISTERED,
-				&unit->status))
+	list_for_each_entry_safe(port, p, &port_remove_lh, list) {
+		list_for_each_entry_safe(unit, u, &unit_remove_lh, list) {
+			if (atomic_read(&unit->status) &
+			    ZFCP_STATUS_UNIT_REGISTERED)
 				scsi_remove_device(unit->device);
 			zfcp_unit_dequeue(unit);
 		}
 		zfcp_port_dequeue(port);
 	}
-	zfcp_adapter_wait(adapter);
+	wait_event(adapter->remove_wq, atomic_read(&adapter->refcount) == 0);
 	zfcp_adapter_dequeue(adapter);
 
 	up(&zfcp_data.config_sema);
@@ -156,15 +159,18 @@
 
 	switch (event) {
 	case CIO_GONE:
-		dev_warn(&adapter->ccw_device->dev, "device gone\n");
+		dev_warn(&adapter->ccw_device->dev,
+			 "The FCP device has been detached\n");
 		zfcp_erp_adapter_shutdown(adapter, 0, 87, NULL);
 		break;
 	case CIO_NO_PATH:
-		dev_warn(&adapter->ccw_device->dev, "no path\n");
+		dev_warn(&adapter->ccw_device->dev,
+			 "The CHPID for the FCP device is offline\n");
 		zfcp_erp_adapter_shutdown(adapter, 0, 88, NULL);
 		break;
 	case CIO_OPER:
-		dev_info(&adapter->ccw_device->dev, "operational again\n");
+		dev_info(&adapter->ccw_device->dev,
+			 "The FCP device is operational again\n");
 		zfcp_erp_modify_adapter_status(adapter, 11, NULL,
 					       ZFCP_STATUS_COMMON_RUNNING,
 					       ZFCP_SET);
@@ -220,3 +226,20 @@
 {
 	return ccw_driver_register(&zfcp_ccw_driver);
 }
+
+/**
+ * zfcp_get_adapter_by_busid - find zfcp_adapter struct
+ * @busid: bus id string of zfcp adapter to find
+ */
+struct zfcp_adapter *zfcp_get_adapter_by_busid(char *busid)
+{
+	struct ccw_device *ccw_device;
+	struct zfcp_adapter *adapter = NULL;
+
+	ccw_device = get_ccwdev_by_busid(&zfcp_ccw_driver, busid);
+	if (ccw_device) {
+		adapter = dev_get_drvdata(&ccw_device->dev);
+		put_device(&ccw_device->dev);
+	}
+	return adapter;
+}
diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
index fca48b8..060f5f2 100644
--- a/drivers/s390/scsi/zfcp_dbf.c
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -318,6 +318,26 @@
 	spin_unlock_irqrestore(&adapter->hba_dbf_lock, flags);
 }
 
+/**
+ * zfcp_hba_dbf_event_berr - trace event for bit error threshold
+ * @adapter: adapter affected by this QDIO related event
+ * @req: fsf request
+ */
+void zfcp_hba_dbf_event_berr(struct zfcp_adapter *adapter,
+			     struct zfcp_fsf_req *req)
+{
+	struct zfcp_hba_dbf_record *r = &adapter->hba_dbf_buf;
+	struct fsf_status_read_buffer *sr_buf = req->data;
+	struct fsf_bit_error_payload *err = &sr_buf->payload.bit_error;
+	unsigned long flags;
+
+	spin_lock_irqsave(&adapter->hba_dbf_lock, flags);
+	memset(r, 0, sizeof(*r));
+	strncpy(r->tag, "berr", ZFCP_DBF_TAG_SIZE);
+	memcpy(&r->u.berr, err, sizeof(struct fsf_bit_error_payload));
+	debug_event(adapter->hba_dbf, 0, r, sizeof(*r));
+	spin_unlock_irqrestore(&adapter->hba_dbf_lock, flags);
+}
 static void zfcp_hba_dbf_view_response(char **p,
 				       struct zfcp_hba_dbf_record_response *r)
 {
@@ -399,6 +419,30 @@
 	zfcp_dbf_out(p, "sbal_count", "0x%02x", r->sbal_count);
 }
 
+static void zfcp_hba_dbf_view_berr(char **p, struct fsf_bit_error_payload *r)
+{
+	zfcp_dbf_out(p, "link_failures", "%d", r->link_failure_error_count);
+	zfcp_dbf_out(p, "loss_of_sync_err", "%d", r->loss_of_sync_error_count);
+	zfcp_dbf_out(p, "loss_of_sig_err", "%d", r->loss_of_signal_error_count);
+	zfcp_dbf_out(p, "prim_seq_err", "%d",
+		     r->primitive_sequence_error_count);
+	zfcp_dbf_out(p, "inval_trans_word_err", "%d",
+		     r->invalid_transmission_word_error_count);
+	zfcp_dbf_out(p, "CRC_errors", "%d", r->crc_error_count);
+	zfcp_dbf_out(p, "prim_seq_event_to", "%d",
+		     r->primitive_sequence_event_timeout_count);
+	zfcp_dbf_out(p, "elast_buf_overrun_err", "%d",
+		     r->elastic_buffer_overrun_error_count);
+	zfcp_dbf_out(p, "adv_rec_buf2buf_cred", "%d",
+		     r->advertised_receive_b2b_credit);
+	zfcp_dbf_out(p, "curr_rec_buf2buf_cred", "%d",
+		     r->current_receive_b2b_credit);
+	zfcp_dbf_out(p, "adv_trans_buf2buf_cred", "%d",
+		     r->advertised_transmit_b2b_credit);
+	zfcp_dbf_out(p, "curr_trans_buf2buf_cred", "%d",
+		     r->current_transmit_b2b_credit);
+}
+
 static int zfcp_hba_dbf_view_format(debug_info_t *id, struct debug_view *view,
 				    char *out_buf, const char *in_buf)
 {
@@ -418,6 +462,8 @@
 		zfcp_hba_dbf_view_status(&p, &r->u.status);
 	else if (strncmp(r->tag, "qdio", ZFCP_DBF_TAG_SIZE) == 0)
 		zfcp_hba_dbf_view_qdio(&p, &r->u.qdio);
+	else if (strncmp(r->tag, "berr", ZFCP_DBF_TAG_SIZE) == 0)
+		zfcp_hba_dbf_view_berr(&p, &r->u.berr);
 
 	p += sprintf(p, "\n");
 	return p - out_buf;
@@ -519,14 +565,14 @@
 	[75]	= "physical port recovery escalation after failed port "
 		  "recovery",
 	[76]	= "port recovery escalation after failed unit recovery",
-	[77]	= "recovery opening nameserver port",
+	[77]	= "",
 	[78]	= "duplicate request id",
 	[79]	= "link down",
 	[80]	= "exclusive read-only unit access unsupported",
 	[81]	= "shared read-write unit access unsupported",
 	[82]	= "incoming rscn",
 	[83]	= "incoming wwpn",
-	[84]	= "",
+	[84]	= "wka port handle not valid close port",
 	[85]	= "online",
 	[86]	= "offline",
 	[87]	= "ccw device gone",
@@ -570,7 +616,7 @@
 	[125]	= "need newer zfcp",
 	[126]	= "need newer microcode",
 	[127]	= "arbitrated loop not supported",
-	[128]	= "unknown topology",
+	[128]	= "",
 	[129]	= "qtcb size mismatch",
 	[130]	= "unknown fsf status ecd",
 	[131]	= "fcp request too big",
@@ -829,9 +875,9 @@
 void zfcp_san_dbf_event_ct_request(struct zfcp_fsf_req *fsf_req)
 {
 	struct zfcp_send_ct *ct = (struct zfcp_send_ct *)fsf_req->data;
-	struct zfcp_port *port = ct->port;
-	struct zfcp_adapter *adapter = port->adapter;
-	struct ct_hdr *hdr = zfcp_sg_to_address(ct->req);
+	struct zfcp_wka_port *wka_port = ct->wka_port;
+	struct zfcp_adapter *adapter = wka_port->adapter;
+	struct ct_hdr *hdr = sg_virt(ct->req);
 	struct zfcp_san_dbf_record *r = &adapter->san_dbf_buf;
 	struct zfcp_san_dbf_record_ct_request *oct = &r->u.ct_req;
 	unsigned long flags;
@@ -842,7 +888,7 @@
 	r->fsf_reqid = (unsigned long)fsf_req;
 	r->fsf_seqno = fsf_req->seq_no;
 	r->s_id = fc_host_port_id(adapter->scsi_host);
-	r->d_id = port->d_id;
+	r->d_id = wka_port->d_id;
 	oct->cmd_req_code = hdr->cmd_rsp_code;
 	oct->revision = hdr->revision;
 	oct->gs_type = hdr->gs_type;
@@ -863,9 +909,9 @@
 void zfcp_san_dbf_event_ct_response(struct zfcp_fsf_req *fsf_req)
 {
 	struct zfcp_send_ct *ct = (struct zfcp_send_ct *)fsf_req->data;
-	struct zfcp_port *port = ct->port;
-	struct zfcp_adapter *adapter = port->adapter;
-	struct ct_hdr *hdr = zfcp_sg_to_address(ct->resp);
+	struct zfcp_wka_port *wka_port = ct->wka_port;
+	struct zfcp_adapter *adapter = wka_port->adapter;
+	struct ct_hdr *hdr = sg_virt(ct->resp);
 	struct zfcp_san_dbf_record *r = &adapter->san_dbf_buf;
 	struct zfcp_san_dbf_record_ct_response *rct = &r->u.ct_resp;
 	unsigned long flags;
@@ -875,7 +921,7 @@
 	strncpy(r->tag, "rctc", ZFCP_DBF_TAG_SIZE);
 	r->fsf_reqid = (unsigned long)fsf_req;
 	r->fsf_seqno = fsf_req->seq_no;
-	r->s_id = port->d_id;
+	r->s_id = wka_port->d_id;
 	r->d_id = fc_host_port_id(adapter->scsi_host);
 	rct->cmd_rsp_code = hdr->cmd_rsp_code;
 	rct->revision = hdr->revision;
@@ -922,8 +968,8 @@
 
 	zfcp_san_dbf_event_els("oels", 2, fsf_req,
 			       fc_host_port_id(els->adapter->scsi_host),
-			       els->d_id, *(u8 *) zfcp_sg_to_address(els->req),
-			       zfcp_sg_to_address(els->req), els->req->length);
+			       els->d_id, *(u8 *) sg_virt(els->req),
+			       sg_virt(els->req), els->req->length);
 }
 
 /**
@@ -936,8 +982,7 @@
 
 	zfcp_san_dbf_event_els("rels", 2, fsf_req, els->d_id,
 			       fc_host_port_id(els->adapter->scsi_host),
-			       *(u8 *)zfcp_sg_to_address(els->req),
-			       zfcp_sg_to_address(els->resp),
+			       *(u8 *)sg_virt(els->req), sg_virt(els->resp),
 			       els->resp->length);
 }
 
diff --git a/drivers/s390/scsi/zfcp_dbf.h b/drivers/s390/scsi/zfcp_dbf.h
index 0ddb184..e8f4508 100644
--- a/drivers/s390/scsi/zfcp_dbf.h
+++ b/drivers/s390/scsi/zfcp_dbf.h
@@ -151,6 +151,7 @@
 		struct zfcp_hba_dbf_record_response response;
 		struct zfcp_hba_dbf_record_status status;
 		struct zfcp_hba_dbf_record_qdio qdio;
+		struct fsf_bit_error_payload berr;
 	} u;
 } __attribute__ ((packed));
 
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 67f45fc..73eb415 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -39,29 +39,6 @@
 
 /********************* GENERAL DEFINES *********************************/
 
-/**
- * zfcp_sg_to_address - determine kernel address from struct scatterlist
- * @list: struct scatterlist
- * Return: kernel address
- */
-static inline void *
-zfcp_sg_to_address(struct scatterlist *list)
-{
-	return sg_virt(list);
-}
-
-/**
- * zfcp_address_to_sg - set up struct scatterlist from kernel address
- * @address: kernel address
- * @list: struct scatterlist
- * @size: buffer size
- */
-static inline void
-zfcp_address_to_sg(void *address, struct scatterlist *list, unsigned int size)
-{
-	sg_set_buf(list, address, size);
-}
-
 #define REQUEST_LIST_SIZE 128
 
 /********************* SCSI SPECIFIC DEFINES *********************************/
@@ -101,11 +78,6 @@
 
 /*************** FIBRE CHANNEL PROTOCOL SPECIFIC DEFINES ********************/
 
-typedef unsigned long long wwn_t;
-typedef unsigned long long fcp_lun_t;
-/* data length field may be at variable position in FCP-2 FCP_CMND IU */
-typedef unsigned int       fcp_dl_t;
-
 /* timeout for name-server lookup (in seconds) */
 #define ZFCP_NS_GID_PN_TIMEOUT		10
 
@@ -129,7 +101,7 @@
 
 /* FCP(-2) FCP_CMND IU */
 struct fcp_cmnd_iu {
-	fcp_lun_t fcp_lun;	   /* FCP logical unit number */
+	u64 fcp_lun;	   /* FCP logical unit number */
 	u8  crn;	           /* command reference number */
 	u8  reserved0:5;	   /* reserved */
 	u8  task_attribute:3;	   /* task attribute */
@@ -204,7 +176,7 @@
 struct fcp_logo {
         u32 command;
         u32 nport_did;
-        wwn_t nport_wwpn;
+	u64 nport_wwpn;
 } __attribute__((packed));
 
 /*
@@ -218,13 +190,6 @@
 #define ZFCP_LS_RSCN			0x61
 #define ZFCP_LS_RNID			0x78
 
-struct zfcp_ls_rjt_par {
-	u8 action;
- 	u8 reason_code;
- 	u8 reason_expl;
- 	u8 vendor_unique;
-} __attribute__ ((packed));
-
 struct zfcp_ls_adisc {
 	u8		code;
 	u8		field[3];
@@ -234,20 +199,6 @@
 	u32		nport_id;
 } __attribute__ ((packed));
 
-struct zfcp_ls_adisc_acc {
-	u8		code;
-	u8		field[3];
-	u32		hard_nport_id;
-	u64		wwpn;
-	u64		wwnn;
-	u32		nport_id;
-} __attribute__ ((packed));
-
-struct zfcp_rc_entry {
-	u8 code;
-	const char *description;
-};
-
 /*
  * FC-GS-2 stuff
  */
@@ -281,9 +232,7 @@
 #define ZFCP_STATUS_COMMON_RUNNING		0x40000000
 #define ZFCP_STATUS_COMMON_ERP_FAILED		0x20000000
 #define ZFCP_STATUS_COMMON_UNBLOCKED		0x10000000
-#define ZFCP_STATUS_COMMON_OPENING              0x08000000
 #define ZFCP_STATUS_COMMON_OPEN                 0x04000000
-#define ZFCP_STATUS_COMMON_CLOSING              0x02000000
 #define ZFCP_STATUS_COMMON_ERP_INUSE		0x01000000
 #define ZFCP_STATUS_COMMON_ACCESS_DENIED	0x00800000
 #define ZFCP_STATUS_COMMON_ACCESS_BOXED		0x00400000
@@ -291,16 +240,15 @@
 
 /* adapter status */
 #define ZFCP_STATUS_ADAPTER_QDIOUP		0x00000002
-#define ZFCP_STATUS_ADAPTER_REGISTERED		0x00000004
 #define ZFCP_STATUS_ADAPTER_XCONFIG_OK		0x00000008
 #define ZFCP_STATUS_ADAPTER_HOST_CON_INIT	0x00000010
 #define ZFCP_STATUS_ADAPTER_ERP_THREAD_UP	0x00000020
 #define ZFCP_STATUS_ADAPTER_ERP_THREAD_KILL	0x00000080
 #define ZFCP_STATUS_ADAPTER_ERP_PENDING		0x00000100
 #define ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED	0x00000200
-#define ZFCP_STATUS_ADAPTER_XPORT_OK		0x00000800
 
 /* FC-PH/FC-GS well-known address identifiers for generic services */
+#define ZFCP_DID_WKA				0xFFFFF0
 #define ZFCP_DID_MANAGEMENT_SERVICE		0xFFFFFA
 #define ZFCP_DID_TIME_SERVICE			0xFFFFFB
 #define ZFCP_DID_DIRECTORY_SERVICE		0xFFFFFC
@@ -312,29 +260,27 @@
 #define ZFCP_STATUS_PORT_DID_DID		0x00000002
 #define ZFCP_STATUS_PORT_PHYS_CLOSING		0x00000004
 #define ZFCP_STATUS_PORT_NO_WWPN		0x00000008
-#define ZFCP_STATUS_PORT_NO_SCSI_ID		0x00000010
 #define ZFCP_STATUS_PORT_INVALID_WWPN		0x00000020
 
-/* for ports with well known addresses */
-#define ZFCP_STATUS_PORT_WKA \
-		(ZFCP_STATUS_PORT_NO_WWPN | \
-		 ZFCP_STATUS_PORT_NO_SCSI_ID)
+/* well known address (WKA) port status*/
+enum zfcp_wka_status {
+	ZFCP_WKA_PORT_OFFLINE,
+	ZFCP_WKA_PORT_CLOSING,
+	ZFCP_WKA_PORT_OPENING,
+	ZFCP_WKA_PORT_ONLINE,
+};
 
 /* logical unit status */
-#define ZFCP_STATUS_UNIT_TEMPORARY		0x00000002
 #define ZFCP_STATUS_UNIT_SHARED			0x00000004
 #define ZFCP_STATUS_UNIT_READONLY		0x00000008
 #define ZFCP_STATUS_UNIT_REGISTERED		0x00000010
 #define ZFCP_STATUS_UNIT_SCSI_WORK_PENDING	0x00000020
 
 /* FSF request status (this does not have a common part) */
-#define ZFCP_STATUS_FSFREQ_NOT_INIT		0x00000000
-#define ZFCP_STATUS_FSFREQ_POOL  		0x00000001
 #define ZFCP_STATUS_FSFREQ_TASK_MANAGEMENT	0x00000002
 #define ZFCP_STATUS_FSFREQ_COMPLETED		0x00000004
 #define ZFCP_STATUS_FSFREQ_ERROR		0x00000008
 #define ZFCP_STATUS_FSFREQ_CLEANUP		0x00000010
-#define ZFCP_STATUS_FSFREQ_ABORTING		0x00000020
 #define ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED	0x00000040
 #define ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED       0x00000080
 #define ZFCP_STATUS_FSFREQ_ABORTED              0x00000100
@@ -379,7 +325,7 @@
  * a port name is required */
 struct ct_iu_gid_pn_req {
 	struct ct_hdr header;
-	wwn_t wwpn;
+	u64 wwpn;
 } __attribute__ ((packed));
 
 /* FS_ACC IU and data unit for GID_PN nameserver request */
@@ -388,11 +334,9 @@
 	u32 d_id;
 } __attribute__ ((packed));
 
-typedef void (*zfcp_send_ct_handler_t)(unsigned long);
-
 /**
  * struct zfcp_send_ct - used to pass parameters to function zfcp_fsf_send_ct
- * @port: port where the request is sent to
+ * @wka_port: port where the request is sent to
  * @req: scatter-gather list for request
  * @resp: scatter-gather list for response
  * @req_count: number of elements in request scatter-gather list
@@ -404,12 +348,12 @@
  * @status: used to pass error status to calling function
  */
 struct zfcp_send_ct {
-	struct zfcp_port *port;
+	struct zfcp_wka_port *wka_port;
 	struct scatterlist *req;
 	struct scatterlist *resp;
 	unsigned int req_count;
 	unsigned int resp_count;
-	zfcp_send_ct_handler_t handler;
+	void (*handler)(unsigned long);
 	unsigned long handler_data;
 	int timeout;
 	struct completion *completion;
@@ -426,8 +370,6 @@
         struct zfcp_port *port;
 };
 
-typedef void (*zfcp_send_els_handler_t)(unsigned long);
-
 /**
  * struct zfcp_send_els - used to pass parameters to function zfcp_fsf_send_els
  * @adapter: adapter where request is sent from
@@ -451,22 +393,28 @@
 	struct scatterlist *resp;
 	unsigned int req_count;
 	unsigned int resp_count;
-	zfcp_send_els_handler_t handler;
+	void (*handler)(unsigned long);
 	unsigned long handler_data;
 	struct completion *completion;
 	int ls_code;
 	int status;
 };
 
+struct zfcp_wka_port {
+	struct zfcp_adapter	*adapter;
+	wait_queue_head_t	completion_wq;
+	enum zfcp_wka_status	status;
+	atomic_t		refcount;
+	u32			d_id;
+	u32			handle;
+	struct mutex		mutex;
+	struct delayed_work	work;
+};
+
 struct zfcp_qdio_queue {
-	struct qdio_buffer *sbal[QDIO_MAX_BUFFERS_PER_Q]; /* SBALs */
-	u8		   first;	      /* index of next free bfr
-						 in queue (free_count>0) */
-	atomic_t           count;	      /* number of free buffers
-						 in queue */
-	spinlock_t	   lock;	      /* lock for operations on queue */
-	int                pci_batch;	      /* SBALs since PCI indication
-						 was last set */
+	struct qdio_buffer *sbal[QDIO_MAX_BUFFERS_PER_Q];
+	u8		   first;	/* index of next free bfr in queue */
+	atomic_t           count;	/* number of free buffers in queue */
 };
 
 struct zfcp_erp_action {
@@ -475,7 +423,7 @@
 	struct zfcp_adapter *adapter; /* device which should be recovered */
 	struct zfcp_port *port;
 	struct zfcp_unit *unit;
-	volatile u32 status;	      /* recovery status */
+	u32		status;	      /* recovery status */
 	u32 step;	              /* active step of this erp action */
 	struct zfcp_fsf_req *fsf_req; /* fsf request currently pending
 					 for this action */
@@ -506,8 +454,8 @@
 	atomic_t                refcount;          /* reference count */
 	wait_queue_head_t	remove_wq;         /* can be used to wait for
 						      refcount drop to zero */
-	wwn_t			peer_wwnn;	   /* P2P peer WWNN */
-	wwn_t			peer_wwpn;	   /* P2P peer WWPN */
+	u64			peer_wwnn;	   /* P2P peer WWNN */
+	u64			peer_wwpn;	   /* P2P peer WWPN */
 	u32			peer_d_id;	   /* P2P peer D_ID */
 	struct ccw_device       *ccw_device;	   /* S/390 ccw device */
 	u32			hydra_version;	   /* Hydra version */
@@ -518,13 +466,13 @@
 	u16			timer_ticks;       /* time int for a tick */
 	struct Scsi_Host	*scsi_host;	   /* Pointer to mid-layer */
 	struct list_head	port_list_head;	   /* remote port list */
-	struct list_head        port_remove_lh;    /* head of ports to be
-						      removed */
-	u32			ports;	           /* number of remote ports */
 	unsigned long		req_no;		   /* unique FSF req number */
 	struct list_head	*req_list;	   /* list of pending reqs */
 	spinlock_t		req_list_lock;	   /* request list lock */
 	struct zfcp_qdio_queue	req_q;		   /* request queue */
+	spinlock_t		req_q_lock;	   /* for operations on queue */
+	int			req_q_pci_batch;   /* SBALs since PCI indication
+						      was last set */
 	u32			fsf_req_seq_no;	   /* FSF cmnd seq number */
 	wait_queue_head_t	request_wq;	   /* can be used to wait for
 						      more avaliable SBALs */
@@ -548,7 +496,7 @@
 						      actions */
 	u32			erp_low_mem_count; /* nr of erp actions waiting
 						      for memory */
-	struct zfcp_port	*nameserver_port;  /* adapter's nameserver */
+	struct zfcp_wka_port	nsp;		   /* adapter's nameserver */
 	debug_info_t		*rec_dbf;
 	debug_info_t		*hba_dbf;
 	debug_info_t		*san_dbf;          /* debug feature areas */
@@ -563,11 +511,11 @@
 	struct zfcp_scsi_dbf_record	scsi_dbf_buf;
 	struct zfcp_adapter_mempool	pool;      /* Adapter memory pools */
 	struct qdio_initialize  qdio_init_data;    /* for qdio_establish */
-	struct device           generic_services;  /* directory for WKA ports */
 	struct fc_host_statistics *fc_stats;
 	struct fsf_qtcb_bottom_port *stats_reset_data;
 	unsigned long		stats_reset;
 	struct work_struct	scan_work;
+	atomic_t		qdio_outb_full;	   /* queue full incidents */
 };
 
 struct zfcp_port {
@@ -579,18 +527,16 @@
 						  refcount drop to zero */
 	struct zfcp_adapter    *adapter;       /* adapter used to access port */
 	struct list_head       unit_list_head; /* head of logical unit list */
-	struct list_head       unit_remove_lh; /* head of luns to be removed
-						  list */
-	u32		       units;	       /* # of logical units in list */
 	atomic_t	       status;	       /* status of this remote port */
-	wwn_t		       wwnn;	       /* WWNN if known */
-	wwn_t		       wwpn;	       /* WWPN */
+	u64		       wwnn;	       /* WWNN if known */
+	u64		       wwpn;	       /* WWPN */
 	u32		       d_id;	       /* D_ID */
 	u32		       handle;	       /* handle assigned by FSF */
 	struct zfcp_erp_action erp_action;     /* pending error recovery */
         atomic_t               erp_counter;
 	u32                    maxframe_size;
 	u32                    supported_classes;
+	struct work_struct     gid_pn_work;
 };
 
 struct zfcp_unit {
@@ -601,8 +547,7 @@
 						  refcount drop to zero */
 	struct zfcp_port       *port;	       /* remote port of unit */
 	atomic_t	       status;	       /* status of this logical unit */
-	unsigned int	       scsi_lun;       /* own SCSI LUN */
-	fcp_lun_t	       fcp_lun;	       /* own FCP_LUN */
+	u64		       fcp_lun;	       /* own FCP_LUN */
 	u32		       handle;	       /* handle assigned by FSF */
         struct scsi_device     *device;        /* scsi device struct pointer */
 	struct zfcp_erp_action erp_action;     /* pending error recovery */
@@ -625,7 +570,7 @@
 	u8			sbal_response;	/* SBAL used in interrupt */
 	wait_queue_head_t      completion_wq;  /* can be used by a routine
 						  to wait for completion */
-	volatile u32	       status;	       /* status of this request */
+	u32			status;	       /* status of this request */
 	u32		       fsf_command;    /* FSF Command copy */
 	struct fsf_qtcb	       *qtcb;	       /* address of associated QTCB */
 	u32		       seq_no;         /* Sequence number of request */
@@ -644,11 +589,7 @@
 struct zfcp_data {
 	struct scsi_host_template scsi_host_template;
 	struct scsi_transport_template *scsi_transport_template;
-        atomic_t                status;             /* Module status flags */
 	struct list_head	adapter_list_head;  /* head of adapter list */
-	struct list_head	adapter_remove_lh;  /* head of adapters to be
-						       removed */
-	u32			adapters;	    /* # of adapters in list */
 	rwlock_t                config_lock;        /* serialises changes
 						       to adapter/port/unit
 						       lists */
@@ -656,11 +597,12 @@
 						       changes */
 	atomic_t		loglevel;            /* current loglevel */
 	char                    init_busid[BUS_ID_SIZE];
-	wwn_t                   init_wwpn;
-	fcp_lun_t               init_fcp_lun;
-	struct kmem_cache		*fsf_req_qtcb_cache;
-	struct kmem_cache		*sr_buffer_cache;
-	struct kmem_cache		*gid_pn_cache;
+	u64			init_wwpn;
+	u64			init_fcp_lun;
+	struct kmem_cache	*fsf_req_qtcb_cache;
+	struct kmem_cache	*sr_buffer_cache;
+	struct kmem_cache	*gid_pn_cache;
+	struct workqueue_struct	*work_queue;
 };
 
 /* struct used by memory pools for fsf_requests */
@@ -677,14 +619,7 @@
 #define ZFCP_SET                0x00000100
 #define ZFCP_CLEAR              0x00000200
 
-#ifndef atomic_test_mask
-#define atomic_test_mask(mask, target) \
-           ((atomic_read(target) & mask) == mask)
-#endif
-
 #define zfcp_get_busid_by_adapter(adapter) (adapter->ccw_device->dev.bus_id)
-#define zfcp_get_busid_by_port(port) (zfcp_get_busid_by_adapter(port->adapter))
-#define zfcp_get_busid_by_unit(unit) (zfcp_get_busid_by_port(unit->port))
 
 /*
  * Helper functions for request ID management.
@@ -745,12 +680,6 @@
 }
 
 static inline void
-zfcp_unit_wait(struct zfcp_unit *unit)
-{
-	wait_event(unit->remove_wq, atomic_read(&unit->refcount) == 0);
-}
-
-static inline void
 zfcp_port_get(struct zfcp_port *port)
 {
 	atomic_inc(&port->refcount);
@@ -764,12 +693,6 @@
 }
 
 static inline void
-zfcp_port_wait(struct zfcp_port *port)
-{
-	wait_event(port->remove_wq, atomic_read(&port->refcount) == 0);
-}
-
-static inline void
 zfcp_adapter_get(struct zfcp_adapter *adapter)
 {
 	atomic_inc(&adapter->refcount);
@@ -782,10 +705,4 @@
 		wake_up(&adapter->remove_wq);
 }
 
-static inline void
-zfcp_adapter_wait(struct zfcp_adapter *adapter)
-{
-	wait_event(adapter->remove_wq, atomic_read(&adapter->refcount) == 0);
-}
-
 #endif /* ZFCP_DEF_H */
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index 643ac4b..7823131 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -23,7 +23,6 @@
 	ZFCP_ERP_STEP_FSF_XCONFIG	= 0x0001,
 	ZFCP_ERP_STEP_PHYS_PORT_CLOSING	= 0x0010,
 	ZFCP_ERP_STEP_PORT_CLOSING	= 0x0100,
-	ZFCP_ERP_STEP_NAMESERVER_OPEN	= 0x0200,
 	ZFCP_ERP_STEP_NAMESERVER_LOOKUP	= 0x0400,
 	ZFCP_ERP_STEP_PORT_OPENING	= 0x0800,
 	ZFCP_ERP_STEP_UNIT_CLOSING	= 0x1000,
@@ -532,8 +531,7 @@
 	struct zfcp_port *port;
 
 	list_for_each_entry(port, &adapter->port_list_head, list)
-		if (!(atomic_read(&port->status) & ZFCP_STATUS_PORT_WKA))
-			_zfcp_erp_port_reopen(port, clear, id, ref);
+		_zfcp_erp_port_reopen(port, clear, id, ref);
 }
 
 static void _zfcp_erp_unit_reopen_all(struct zfcp_port *port, int clear, u8 id,
@@ -669,8 +667,6 @@
 	int ret;
 	struct zfcp_adapter *adapter = act->adapter;
 
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status);
-
 	write_lock_irq(&adapter->erp_lock);
 	zfcp_erp_action_to_running(act);
 	write_unlock_irq(&adapter->erp_lock);
@@ -741,8 +737,7 @@
 				       ZFCP_STATUS_COMMON_OPEN, ZFCP_CLEAR);
  failed_qdio:
 	atomic_clear_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK |
-			  ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED |
-			  ZFCP_STATUS_ADAPTER_XPORT_OK,
+			  ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED,
 			  &act->adapter->status);
 	return retval;
 }
@@ -751,15 +746,11 @@
 {
 	int retval;
 
-	atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING, &act->adapter->status);
 	zfcp_erp_adapter_strategy_generic(act, 1); /* close */
-	atomic_clear_mask(ZFCP_STATUS_COMMON_CLOSING, &act->adapter->status);
 	if (act->status & ZFCP_STATUS_ERP_CLOSE_ONLY)
 		return ZFCP_ERP_EXIT;
 
-	atomic_set_mask(ZFCP_STATUS_COMMON_OPENING, &act->adapter->status);
 	retval = zfcp_erp_adapter_strategy_generic(act, 0); /* open */
-	atomic_clear_mask(ZFCP_STATUS_COMMON_OPENING, &act->adapter->status);
 
 	if (retval == ZFCP_ERP_FAILED)
 		ssleep(8);
@@ -783,10 +774,7 @@
 
 static void zfcp_erp_port_strategy_clearstati(struct zfcp_port *port)
 {
-	atomic_clear_mask(ZFCP_STATUS_COMMON_OPENING |
-			  ZFCP_STATUS_COMMON_CLOSING |
-			  ZFCP_STATUS_COMMON_ACCESS_DENIED |
-			  ZFCP_STATUS_PORT_DID_DID |
+	atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED |
 			  ZFCP_STATUS_PORT_PHYS_CLOSING |
 			  ZFCP_STATUS_PORT_INVALID_WWPN,
 			  &port->status);
@@ -839,73 +827,12 @@
 	return ZFCP_ERP_CONTINUES;
 }
 
-static void zfcp_erp_port_strategy_open_ns_wake(struct zfcp_erp_action *ns_act)
-{
-	unsigned long flags;
-	struct zfcp_adapter *adapter = ns_act->adapter;
-	struct zfcp_erp_action *act, *tmp;
-	int status;
-
-	read_lock_irqsave(&adapter->erp_lock, flags);
-	list_for_each_entry_safe(act, tmp, &adapter->erp_running_head, list) {
-		if (act->step == ZFCP_ERP_STEP_NAMESERVER_OPEN) {
-			status = atomic_read(&adapter->nameserver_port->status);
-			if (status & ZFCP_STATUS_COMMON_ERP_FAILED)
-				zfcp_erp_port_failed(act->port, 27, NULL);
-			zfcp_erp_action_ready(act);
-		}
-	}
-	read_unlock_irqrestore(&adapter->erp_lock, flags);
-}
-
-static int zfcp_erp_port_strategy_open_nameserver(struct zfcp_erp_action *act)
-{
-	int retval;
-
-	switch (act->step) {
-	case ZFCP_ERP_STEP_UNINITIALIZED:
-	case ZFCP_ERP_STEP_PHYS_PORT_CLOSING:
-	case ZFCP_ERP_STEP_PORT_CLOSING:
-		return zfcp_erp_port_strategy_open_port(act);
-
-	case ZFCP_ERP_STEP_PORT_OPENING:
-		if (atomic_read(&act->port->status) & ZFCP_STATUS_COMMON_OPEN)
-			retval = ZFCP_ERP_SUCCEEDED;
-		else
-			retval = ZFCP_ERP_FAILED;
-		/* this is needed anyway  */
-		zfcp_erp_port_strategy_open_ns_wake(act);
-		return retval;
-
-	default:
-		return ZFCP_ERP_FAILED;
-	}
-}
-
-static int zfcp_erp_port_strategy_open_lookup(struct zfcp_erp_action *act)
-{
-	int retval;
-
-	retval = zfcp_fc_ns_gid_pn_request(act);
-	if (retval == -ENOMEM)
-		return ZFCP_ERP_NOMEM;
-	act->step = ZFCP_ERP_STEP_NAMESERVER_LOOKUP;
-	if (retval)
-		return ZFCP_ERP_FAILED;
-	return ZFCP_ERP_CONTINUES;
-}
-
 static int zfcp_erp_open_ptp_port(struct zfcp_erp_action *act)
 {
 	struct zfcp_adapter *adapter = act->adapter;
 	struct zfcp_port *port = act->port;
 
 	if (port->wwpn != adapter->peer_wwpn) {
-		dev_err(&adapter->ccw_device->dev,
-			"Failed to open port 0x%016Lx, "
-			"Peer WWPN 0x%016Lx does not "
-			"match.\n", port->wwpn,
-			adapter->peer_wwpn);
 		zfcp_erp_port_failed(port, 25, NULL);
 		return ZFCP_ERP_FAILED;
 	}
@@ -914,11 +841,25 @@
 	return zfcp_erp_port_strategy_open_port(act);
 }
 
+void zfcp_erp_port_strategy_open_lookup(struct work_struct *work)
+{
+	int retval;
+	struct zfcp_port *port = container_of(work, struct zfcp_port,
+					      gid_pn_work);
+
+	retval = zfcp_fc_ns_gid_pn(&port->erp_action);
+	if (retval == -ENOMEM)
+		zfcp_erp_notify(&port->erp_action, ZFCP_ERP_NOMEM);
+	port->erp_action.step = ZFCP_ERP_STEP_NAMESERVER_LOOKUP;
+	if (retval)
+		zfcp_erp_notify(&port->erp_action, ZFCP_ERP_FAILED);
+
+}
+
 static int zfcp_erp_port_strategy_open_common(struct zfcp_erp_action *act)
 {
 	struct zfcp_adapter *adapter = act->adapter;
 	struct zfcp_port *port = act->port;
-	struct zfcp_port *ns_port = adapter->nameserver_port;
 	int p_status = atomic_read(&port->status);
 
 	switch (act->step) {
@@ -927,28 +868,10 @@
 	case ZFCP_ERP_STEP_PORT_CLOSING:
 		if (fc_host_port_type(adapter->scsi_host) == FC_PORTTYPE_PTP)
 			return zfcp_erp_open_ptp_port(act);
-		if (!ns_port) {
-			dev_err(&adapter->ccw_device->dev,
-				"Nameserver port unavailable.\n");
-			return ZFCP_ERP_FAILED;
+		if (!(p_status & ZFCP_STATUS_PORT_DID_DID)) {
+			queue_work(zfcp_data.work_queue, &port->gid_pn_work);
+			return ZFCP_ERP_CONTINUES;
 		}
-		if (!(atomic_read(&ns_port->status) &
-		      ZFCP_STATUS_COMMON_UNBLOCKED)) {
-			/* nameserver port may live again */
-			atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING,
-					&ns_port->status);
-			if (zfcp_erp_port_reopen(ns_port, 0, 77, act) >= 0) {
-				act->step = ZFCP_ERP_STEP_NAMESERVER_OPEN;
-				return ZFCP_ERP_CONTINUES;
-			}
-			return ZFCP_ERP_FAILED;
-		}
-		/* else nameserver port is already open, fall through */
-	case ZFCP_ERP_STEP_NAMESERVER_OPEN:
-		if (!(atomic_read(&ns_port->status) & ZFCP_STATUS_COMMON_OPEN))
-			return ZFCP_ERP_FAILED;
-		return zfcp_erp_port_strategy_open_lookup(act);
-
 	case ZFCP_ERP_STEP_NAMESERVER_LOOKUP:
 		if (!(p_status & ZFCP_STATUS_PORT_DID_DID)) {
 			if (p_status & (ZFCP_STATUS_PORT_INVALID_WWPN)) {
@@ -961,25 +884,26 @@
 
 	case ZFCP_ERP_STEP_PORT_OPENING:
 		/* D_ID might have changed during open */
-		if ((p_status & ZFCP_STATUS_COMMON_OPEN) &&
-		    (p_status & ZFCP_STATUS_PORT_DID_DID))
-			return ZFCP_ERP_SUCCEEDED;
+		if (p_status & ZFCP_STATUS_COMMON_OPEN) {
+			if (p_status & ZFCP_STATUS_PORT_DID_DID)
+				return ZFCP_ERP_SUCCEEDED;
+			else {
+				act->step = ZFCP_ERP_STEP_PORT_CLOSING;
+				return ZFCP_ERP_CONTINUES;
+			}
 		/* fall through otherwise */
+		}
 	}
 	return ZFCP_ERP_FAILED;
 }
 
-static int zfcp_erp_port_strategy_open(struct zfcp_erp_action *act)
-{
-	if (atomic_read(&act->port->status) & (ZFCP_STATUS_PORT_WKA))
-		return zfcp_erp_port_strategy_open_nameserver(act);
-	return zfcp_erp_port_strategy_open_common(act);
-}
-
 static int zfcp_erp_port_strategy(struct zfcp_erp_action *erp_action)
 {
 	struct zfcp_port *port = erp_action->port;
 
+	if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_NOESC)
+		goto close_init_done;
+
 	switch (erp_action->step) {
 	case ZFCP_ERP_STEP_UNINITIALIZED:
 		zfcp_erp_port_strategy_clearstati(port);
@@ -992,19 +916,17 @@
 			return ZFCP_ERP_FAILED;
 		break;
 	}
+
+close_init_done:
 	if (erp_action->status & ZFCP_STATUS_ERP_CLOSE_ONLY)
 		return ZFCP_ERP_EXIT;
-	else
-		return zfcp_erp_port_strategy_open(erp_action);
 
-	return ZFCP_ERP_FAILED;
+	return zfcp_erp_port_strategy_open_common(erp_action);
 }
 
 static void zfcp_erp_unit_strategy_clearstati(struct zfcp_unit *unit)
 {
-	atomic_clear_mask(ZFCP_STATUS_COMMON_OPENING |
-			  ZFCP_STATUS_COMMON_CLOSING |
-			  ZFCP_STATUS_COMMON_ACCESS_DENIED |
+	atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED |
 			  ZFCP_STATUS_UNIT_SHARED |
 			  ZFCP_STATUS_UNIT_READONLY,
 			  &unit->status);
@@ -1065,8 +987,14 @@
 		break;
 	case ZFCP_ERP_FAILED :
 		atomic_inc(&unit->erp_counter);
-		if (atomic_read(&unit->erp_counter) > ZFCP_MAX_ERPS)
+		if (atomic_read(&unit->erp_counter) > ZFCP_MAX_ERPS) {
+			dev_err(&unit->port->adapter->ccw_device->dev,
+				"ERP failed for unit 0x%016Lx on "
+				"port 0x%016Lx\n",
+				(unsigned long long)unit->fcp_lun,
+				(unsigned long long)unit->port->wwpn);
 			zfcp_erp_unit_failed(unit, 21, NULL);
+		}
 		break;
 	}
 
@@ -1091,8 +1019,12 @@
 			result = ZFCP_ERP_EXIT;
 		}
 		atomic_inc(&port->erp_counter);
-		if (atomic_read(&port->erp_counter) > ZFCP_MAX_ERPS)
+		if (atomic_read(&port->erp_counter) > ZFCP_MAX_ERPS) {
+			dev_err(&port->adapter->ccw_device->dev,
+				"ERP failed for remote port 0x%016Lx\n",
+				(unsigned long long)port->wwpn);
 			zfcp_erp_port_failed(port, 22, NULL);
+		}
 		break;
 	}
 
@@ -1114,8 +1046,12 @@
 
 	case ZFCP_ERP_FAILED :
 		atomic_inc(&adapter->erp_counter);
-		if (atomic_read(&adapter->erp_counter) > ZFCP_MAX_ERPS)
+		if (atomic_read(&adapter->erp_counter) > ZFCP_MAX_ERPS) {
+			dev_err(&adapter->ccw_device->dev,
+				"ERP cannot recover an error "
+				"on the FCP device\n");
 			zfcp_erp_adapter_failed(adapter, 23, NULL);
+		}
 		break;
 	}
 
@@ -1250,9 +1186,10 @@
 	struct zfcp_unit *unit = p->unit;
 	struct fc_rport *rport = unit->port->rport;
 	scsi_scan_target(&rport->dev, 0, rport->scsi_target_id,
-			 unit->scsi_lun, 0);
+			 scsilun_to_int((struct scsi_lun *)&unit->fcp_lun), 0);
 	atomic_clear_mask(ZFCP_STATUS_UNIT_SCSI_WORK_PENDING, &unit->status);
 	zfcp_unit_put(unit);
+	wake_up(&unit->port->adapter->erp_done_wqh);
 	kfree(p);
 }
 
@@ -1263,9 +1200,9 @@
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p) {
 		dev_err(&unit->port->adapter->ccw_device->dev,
-			"Out of resources. Could not register unit "
-			"0x%016Lx on port 0x%016Lx with SCSI stack.\n",
-			unit->fcp_lun, unit->port->wwpn);
+			"Registering unit 0x%016Lx on port 0x%016Lx failed\n",
+			(unsigned long long)unit->fcp_lun,
+			(unsigned long long)unit->port->wwpn);
 		return;
 	}
 
@@ -1273,7 +1210,7 @@
 	atomic_set_mask(ZFCP_STATUS_UNIT_SCSI_WORK_PENDING, &unit->status);
 	INIT_WORK(&p->work, zfcp_erp_scsi_scan);
 	p->unit = unit;
-	schedule_work(&p->work);
+	queue_work(zfcp_data.work_queue, &p->work);
 }
 
 static void zfcp_erp_rport_register(struct zfcp_port *port)
@@ -1286,8 +1223,8 @@
 	port->rport = fc_remote_port_add(port->adapter->scsi_host, 0, &ids);
 	if (!port->rport) {
 		dev_err(&port->adapter->ccw_device->dev,
-			"Failed registration of rport "
-			"0x%016Lx.\n", port->wwpn);
+			"Registering port 0x%016Lx failed\n",
+			(unsigned long long)port->wwpn);
 		return;
 	}
 
@@ -1299,12 +1236,12 @@
 static void zfcp_erp_rports_del(struct zfcp_adapter *adapter)
 {
 	struct zfcp_port *port;
-	list_for_each_entry(port, &adapter->port_list_head, list)
-		if (port->rport && !(atomic_read(&port->status) &
-					ZFCP_STATUS_PORT_WKA)) {
-			fc_remote_port_delete(port->rport);
-			port->rport = NULL;
-		}
+	list_for_each_entry(port, &adapter->port_list_head, list) {
+		if (!port->rport)
+			continue;
+		fc_remote_port_delete(port->rport);
+		port->rport = NULL;
+	}
 }
 
 static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result)
@@ -1459,9 +1396,9 @@
 				zfcp_erp_wakeup(adapter);
 		}
 
-		zfcp_rec_dbf_event_thread(4, adapter);
+		zfcp_rec_dbf_event_thread_lock(4, adapter);
 		down_interruptible(&adapter->erp_ready_sem);
-		zfcp_rec_dbf_event_thread(5, adapter);
+		zfcp_rec_dbf_event_thread_lock(5, adapter);
 	}
 
 	atomic_clear_mask(ZFCP_STATUS_ADAPTER_ERP_THREAD_UP, &adapter->status);
@@ -1484,7 +1421,7 @@
 	retval = kernel_thread(zfcp_erp_thread, adapter, SIGCHLD);
 	if (retval < 0) {
 		dev_err(&adapter->ccw_device->dev,
-			"Creation of ERP thread failed.\n");
+			"Creating an ERP thread for the FCP device failed.\n");
 		return retval;
 	}
 	wait_event(adapter->erp_thread_wqh,
@@ -1506,7 +1443,7 @@
 {
 	atomic_set_mask(ZFCP_STATUS_ADAPTER_ERP_THREAD_KILL, &adapter->status);
 	up(&adapter->erp_ready_sem);
-	zfcp_rec_dbf_event_thread_lock(2, adapter);
+	zfcp_rec_dbf_event_thread_lock(3, adapter);
 
 	wait_event(adapter->erp_thread_wqh,
 		   !(atomic_read(&adapter->status) &
@@ -1526,7 +1463,6 @@
 {
 	zfcp_erp_modify_adapter_status(adapter, id, ref,
 				       ZFCP_STATUS_COMMON_ERP_FAILED, ZFCP_SET);
-	dev_err(&adapter->ccw_device->dev, "Adapter ERP failed.\n");
 }
 
 /**
@@ -1539,15 +1475,6 @@
 {
 	zfcp_erp_modify_port_status(port, id, ref,
 				    ZFCP_STATUS_COMMON_ERP_FAILED, ZFCP_SET);
-
-	if (atomic_read(&port->status) & ZFCP_STATUS_PORT_WKA)
-		dev_err(&port->adapter->ccw_device->dev,
-			"Port ERP failed for WKA port d_id=0x%06x.\n",
-			port->d_id);
-	else
-		dev_err(&port->adapter->ccw_device->dev,
-			"Port ERP failed for port wwpn=0x%016Lx.\n",
-			port->wwpn);
 }
 
 /**
@@ -1560,10 +1487,6 @@
 {
 	zfcp_erp_modify_unit_status(unit, id, ref,
 				    ZFCP_STATUS_COMMON_ERP_FAILED, ZFCP_SET);
-
-	dev_err(&unit->port->adapter->ccw_device->dev,
-		"Unit ERP failed for unit 0x%016Lx on port 0x%016Lx.\n",
-		unit->fcp_lun, unit->port->wwpn);
 }
 
 /**
@@ -1754,9 +1677,8 @@
 
 	if (!(status & (ZFCP_STATUS_COMMON_ACCESS_DENIED |
 			ZFCP_STATUS_COMMON_ACCESS_BOXED))) {
-		if (!(status & ZFCP_STATUS_PORT_WKA))
-			list_for_each_entry(unit, &port->unit_list_head, list)
-				zfcp_erp_unit_access_changed(unit, id, ref);
+		list_for_each_entry(unit, &port->unit_list_head, list)
+				    zfcp_erp_unit_access_changed(unit, id, ref);
 		return;
 	}
 
@@ -1779,10 +1701,7 @@
 		return;
 
 	read_lock_irqsave(&zfcp_data.config_lock, flags);
-	if (adapter->nameserver_port)
-		zfcp_erp_port_access_changed(adapter->nameserver_port, id, ref);
 	list_for_each_entry(port, &adapter->port_list_head, list)
-		if (port != adapter->nameserver_port)
-			zfcp_erp_port_access_changed(port, id, ref);
+		zfcp_erp_port_access_changed(port, id, ref);
 	read_unlock_irqrestore(&zfcp_data.config_lock, flags);
 }
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index edfdb21..b5adeda 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -12,16 +12,14 @@
 #include "zfcp_def.h"
 
 /* zfcp_aux.c */
-extern struct zfcp_unit *zfcp_get_unit_by_lun(struct zfcp_port *,
-					      fcp_lun_t);
-extern struct zfcp_port *zfcp_get_port_by_wwpn(struct zfcp_adapter *,
-					       wwn_t);
+extern struct zfcp_unit *zfcp_get_unit_by_lun(struct zfcp_port *, u64);
+extern struct zfcp_port *zfcp_get_port_by_wwpn(struct zfcp_adapter *, u64);
 extern int zfcp_adapter_enqueue(struct ccw_device *);
 extern void zfcp_adapter_dequeue(struct zfcp_adapter *);
-extern struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *, wwn_t, u32,
+extern struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *, u64, u32,
 					   u32);
 extern void zfcp_port_dequeue(struct zfcp_port *);
-extern struct zfcp_unit *zfcp_unit_enqueue(struct zfcp_port *, fcp_lun_t);
+extern struct zfcp_unit *zfcp_unit_enqueue(struct zfcp_port *, u64);
 extern void zfcp_unit_dequeue(struct zfcp_unit *);
 extern int zfcp_reqlist_isempty(struct zfcp_adapter *);
 extern void zfcp_sg_free_table(struct scatterlist *, int);
@@ -29,6 +27,7 @@
 
 /* zfcp_ccw.c */
 extern int zfcp_ccw_register(void);
+extern struct zfcp_adapter *zfcp_get_adapter_by_busid(char *);
 
 /* zfcp_cfdc.c */
 extern struct miscdevice zfcp_cfdc_misc;
@@ -50,6 +49,8 @@
 					 struct fsf_status_read_buffer *);
 extern void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *, unsigned int, int,
 				    int);
+extern void zfcp_hba_dbf_event_berr(struct zfcp_adapter *,
+				    struct zfcp_fsf_req *);
 extern void zfcp_san_dbf_event_ct_request(struct zfcp_fsf_req *);
 extern void zfcp_san_dbf_event_ct_response(struct zfcp_fsf_req *);
 extern void zfcp_san_dbf_event_els_request(struct zfcp_fsf_req *);
@@ -91,17 +92,21 @@
 extern void zfcp_erp_unit_access_denied(struct zfcp_unit *, u8, void *);
 extern void zfcp_erp_adapter_access_changed(struct zfcp_adapter *, u8, void *);
 extern void zfcp_erp_timeout_handler(unsigned long);
+extern void zfcp_erp_port_strategy_open_lookup(struct work_struct *);
 
 /* zfcp_fc.c */
 extern int zfcp_scan_ports(struct zfcp_adapter *);
 extern void _zfcp_scan_ports_later(struct work_struct *);
 extern void zfcp_fc_incoming_els(struct zfcp_fsf_req *);
-extern int zfcp_fc_ns_gid_pn_request(struct zfcp_erp_action *);
+extern int zfcp_fc_ns_gid_pn(struct zfcp_erp_action *);
 extern void zfcp_fc_plogi_evaluate(struct zfcp_port *, struct fsf_plogi *);
 extern void zfcp_test_link(struct zfcp_port *);
+extern void zfcp_fc_nameserver_init(struct zfcp_adapter *);
 
 /* zfcp_fsf.c */
 extern int zfcp_fsf_open_port(struct zfcp_erp_action *);
+extern int zfcp_fsf_open_wka_port(struct zfcp_wka_port *);
+extern int zfcp_fsf_close_wka_port(struct zfcp_wka_port *);
 extern int zfcp_fsf_close_port(struct zfcp_erp_action *);
 extern int zfcp_fsf_close_physical_port(struct zfcp_erp_action *);
 extern int zfcp_fsf_open_unit(struct zfcp_erp_action *);
@@ -135,10 +140,8 @@
 extern int zfcp_qdio_allocate(struct zfcp_adapter *);
 extern void zfcp_qdio_free(struct zfcp_adapter *);
 extern int zfcp_qdio_send(struct zfcp_fsf_req *);
-extern volatile struct qdio_buffer_element *zfcp_qdio_sbale_req(
-						struct zfcp_fsf_req *);
-extern volatile struct qdio_buffer_element *zfcp_qdio_sbale_curr(
-						struct zfcp_fsf_req *);
+extern struct qdio_buffer_element *zfcp_qdio_sbale_req(struct zfcp_fsf_req *);
+extern struct qdio_buffer_element *zfcp_qdio_sbale_curr(struct zfcp_fsf_req *);
 extern int zfcp_qdio_sbals_from_sg(struct zfcp_fsf_req *, unsigned long,
 				   struct scatterlist *, int);
 extern int zfcp_qdio_open(struct zfcp_adapter *);
@@ -148,14 +151,12 @@
 extern struct zfcp_data zfcp_data;
 extern int zfcp_adapter_scsi_register(struct zfcp_adapter *);
 extern void zfcp_adapter_scsi_unregister(struct zfcp_adapter *);
-extern void zfcp_set_fcp_dl(struct fcp_cmnd_iu *, fcp_dl_t);
 extern char *zfcp_get_fcp_sns_info_ptr(struct fcp_rsp_iu *);
 extern struct fc_function_template zfcp_transport_functions;
 
 /* zfcp_sysfs.c */
 extern struct attribute_group zfcp_sysfs_unit_attrs;
 extern struct attribute_group zfcp_sysfs_adapter_attrs;
-extern struct attribute_group zfcp_sysfs_ns_port_attrs;
 extern struct attribute_group zfcp_sysfs_port_attrs;
 extern struct device_attribute *zfcp_sysfs_sdev_attrs[];
 extern struct device_attribute *zfcp_sysfs_shost_attrs[];
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 56196c9..1a7c80a 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -39,6 +39,84 @@
 	struct scatterlist sg_resp[ZFCP_GPN_FT_BUFFERS];
 };
 
+struct zfcp_fc_ns_handler_data {
+	struct completion done;
+	void (*handler)(unsigned long);
+	unsigned long handler_data;
+};
+
+static int zfcp_wka_port_get(struct zfcp_wka_port *wka_port)
+{
+	if (mutex_lock_interruptible(&wka_port->mutex))
+		return -ERESTARTSYS;
+
+	if (wka_port->status != ZFCP_WKA_PORT_ONLINE) {
+		wka_port->status = ZFCP_WKA_PORT_OPENING;
+		if (zfcp_fsf_open_wka_port(wka_port))
+			wka_port->status = ZFCP_WKA_PORT_OFFLINE;
+	}
+
+	mutex_unlock(&wka_port->mutex);
+
+	wait_event_timeout(
+		wka_port->completion_wq,
+		wka_port->status == ZFCP_WKA_PORT_ONLINE ||
+		wka_port->status == ZFCP_WKA_PORT_OFFLINE,
+		HZ >> 1);
+
+	if (wka_port->status == ZFCP_WKA_PORT_ONLINE) {
+		atomic_inc(&wka_port->refcount);
+		return 0;
+	}
+	return -EIO;
+}
+
+static void zfcp_wka_port_offline(struct work_struct *work)
+{
+	struct delayed_work *dw = container_of(work, struct delayed_work, work);
+	struct zfcp_wka_port *wka_port =
+			container_of(dw, struct zfcp_wka_port, work);
+
+	wait_event(wka_port->completion_wq,
+			atomic_read(&wka_port->refcount) == 0);
+
+	mutex_lock(&wka_port->mutex);
+	if ((atomic_read(&wka_port->refcount) != 0) ||
+	    (wka_port->status != ZFCP_WKA_PORT_ONLINE))
+		goto out;
+
+	wka_port->status = ZFCP_WKA_PORT_CLOSING;
+	if (zfcp_fsf_close_wka_port(wka_port)) {
+		wka_port->status = ZFCP_WKA_PORT_OFFLINE;
+		wake_up(&wka_port->completion_wq);
+	}
+out:
+	mutex_unlock(&wka_port->mutex);
+}
+
+static void zfcp_wka_port_put(struct zfcp_wka_port *wka_port)
+{
+	if (atomic_dec_return(&wka_port->refcount) != 0)
+		return;
+	/* wait 10 miliseconds, other reqs might pop in */
+	schedule_delayed_work(&wka_port->work, HZ / 100);
+}
+
+void zfcp_fc_nameserver_init(struct zfcp_adapter *adapter)
+{
+	struct zfcp_wka_port *wka_port = &adapter->nsp;
+
+	init_waitqueue_head(&wka_port->completion_wq);
+
+	wka_port->adapter = adapter;
+	wka_port->d_id = ZFCP_DID_DIRECTORY_SERVICE;
+
+	wka_port->status = ZFCP_WKA_PORT_OFFLINE;
+	atomic_set(&wka_port->refcount, 0);
+	mutex_init(&wka_port->mutex);
+	INIT_DELAYED_WORK(&wka_port->work, zfcp_wka_port_offline);
+}
+
 static void _zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req, u32 range,
 				   struct fcp_rscn_element *elem)
 {
@@ -47,10 +125,8 @@
 
 	read_lock_irqsave(&zfcp_data.config_lock, flags);
 	list_for_each_entry(port, &fsf_req->adapter->port_list_head, list) {
-		if (atomic_test_mask(ZFCP_STATUS_PORT_WKA, &port->status))
-			continue;
 		/* FIXME: ZFCP_STATUS_PORT_DID_DID check is racy */
-		if (!atomic_test_mask(ZFCP_STATUS_PORT_DID_DID, &port->status))
+		if (!(atomic_read(&port->status) & ZFCP_STATUS_PORT_DID_DID))
 			/* Try to connect to unused ports anyway. */
 			zfcp_erp_port_reopen(port,
 					     ZFCP_STATUS_COMMON_ERP_FAILED,
@@ -102,7 +178,7 @@
 	schedule_work(&fsf_req->adapter->scan_work);
 }
 
-static void zfcp_fc_incoming_wwpn(struct zfcp_fsf_req *req, wwn_t wwpn)
+static void zfcp_fc_incoming_wwpn(struct zfcp_fsf_req *req, u64 wwpn)
 {
 	struct zfcp_adapter *adapter = req->adapter;
 	struct zfcp_port *port;
@@ -157,7 +233,18 @@
 		zfcp_fc_incoming_rscn(fsf_req);
 }
 
-static void zfcp_ns_gid_pn_handler(unsigned long data)
+static void zfcp_fc_ns_handler(unsigned long data)
+{
+	struct zfcp_fc_ns_handler_data *compl_rec =
+			(struct zfcp_fc_ns_handler_data *) data;
+
+	if (compl_rec->handler)
+		compl_rec->handler(compl_rec->handler_data);
+
+	complete(&compl_rec->done);
+}
+
+static void zfcp_fc_ns_gid_pn_eval(unsigned long data)
 {
 	struct zfcp_gid_pn_data *gid_pn = (struct zfcp_gid_pn_data *) data;
 	struct zfcp_send_ct *ct = &gid_pn->ct;
@@ -166,43 +253,31 @@
 	struct zfcp_port *port = gid_pn->port;
 
 	if (ct->status)
-		goto out;
+		return;
 	if (ct_iu_resp->header.cmd_rsp_code != ZFCP_CT_ACCEPT) {
 		atomic_set_mask(ZFCP_STATUS_PORT_INVALID_WWPN, &port->status);
-		goto out;
+		return;
 	}
 	/* paranoia */
 	if (ct_iu_req->wwpn != port->wwpn)
-		goto out;
+		return;
 	/* looks like a valid d_id */
 	port->d_id = ct_iu_resp->d_id & ZFCP_DID_MASK;
 	atomic_set_mask(ZFCP_STATUS_PORT_DID_DID, &port->status);
-out:
-	mempool_free(gid_pn, port->adapter->pool.data_gid_pn);
 }
 
-/**
- * zfcp_fc_ns_gid_pn_request - initiate GID_PN nameserver request
- * @erp_action: pointer to zfcp_erp_action where GID_PN request is needed
- * return: -ENOMEM on error, 0 otherwise
- */
-int zfcp_fc_ns_gid_pn_request(struct zfcp_erp_action *erp_action)
+int static zfcp_fc_ns_gid_pn_request(struct zfcp_erp_action *erp_action,
+				     struct zfcp_gid_pn_data *gid_pn)
 {
-	int ret;
-	struct zfcp_gid_pn_data *gid_pn;
 	struct zfcp_adapter *adapter = erp_action->adapter;
-
-	gid_pn = mempool_alloc(adapter->pool.data_gid_pn, GFP_ATOMIC);
-	if (!gid_pn)
-		return -ENOMEM;
-
-	memset(gid_pn, 0, sizeof(*gid_pn));
+	struct zfcp_fc_ns_handler_data compl_rec;
+	int ret;
 
 	/* setup parameters for send generic command */
 	gid_pn->port = erp_action->port;
-	gid_pn->ct.port = adapter->nameserver_port;
-	gid_pn->ct.handler = zfcp_ns_gid_pn_handler;
-	gid_pn->ct.handler_data = (unsigned long) gid_pn;
+	gid_pn->ct.wka_port = &adapter->nsp;
+	gid_pn->ct.handler = zfcp_fc_ns_handler;
+	gid_pn->ct.handler_data = (unsigned long) &compl_rec;
 	gid_pn->ct.timeout = ZFCP_NS_GID_PN_TIMEOUT;
 	gid_pn->ct.req = &gid_pn->req;
 	gid_pn->ct.resp = &gid_pn->resp;
@@ -222,10 +297,42 @@
 	gid_pn->ct_iu_req.header.max_res_size = ZFCP_CT_MAX_SIZE;
 	gid_pn->ct_iu_req.wwpn = erp_action->port->wwpn;
 
+	init_completion(&compl_rec.done);
+	compl_rec.handler = zfcp_fc_ns_gid_pn_eval;
+	compl_rec.handler_data = (unsigned long) gid_pn;
 	ret = zfcp_fsf_send_ct(&gid_pn->ct, adapter->pool.fsf_req_erp,
 			       erp_action);
+	if (!ret)
+		wait_for_completion(&compl_rec.done);
+	return ret;
+}
+
+/**
+ * zfcp_fc_ns_gid_pn_request - initiate GID_PN nameserver request
+ * @erp_action: pointer to zfcp_erp_action where GID_PN request is needed
+ * return: -ENOMEM on error, 0 otherwise
+ */
+int zfcp_fc_ns_gid_pn(struct zfcp_erp_action *erp_action)
+{
+	int ret;
+	struct zfcp_gid_pn_data *gid_pn;
+	struct zfcp_adapter *adapter = erp_action->adapter;
+
+	gid_pn = mempool_alloc(adapter->pool.data_gid_pn, GFP_ATOMIC);
+	if (!gid_pn)
+		return -ENOMEM;
+
+	memset(gid_pn, 0, sizeof(*gid_pn));
+
+	ret = zfcp_wka_port_get(&adapter->nsp);
 	if (ret)
-		mempool_free(gid_pn, adapter->pool.data_gid_pn);
+		goto out;
+
+	ret = zfcp_fc_ns_gid_pn_request(erp_action, gid_pn);
+
+	zfcp_wka_port_put(&adapter->nsp);
+out:
+	mempool_free(gid_pn, adapter->pool.data_gid_pn);
 	return ret;
 }
 
@@ -255,14 +362,14 @@
 	struct scatterlist req;
 	struct scatterlist resp;
 	struct zfcp_ls_adisc ls_adisc;
-	struct zfcp_ls_adisc_acc ls_adisc_acc;
+	struct zfcp_ls_adisc ls_adisc_acc;
 };
 
 static void zfcp_fc_adisc_handler(unsigned long data)
 {
 	struct zfcp_els_adisc *adisc = (struct zfcp_els_adisc *) data;
 	struct zfcp_port *port = adisc->els.port;
-	struct zfcp_ls_adisc_acc *ls_adisc = &adisc->ls_adisc_acc;
+	struct zfcp_ls_adisc *ls_adisc = &adisc->ls_adisc_acc;
 
 	if (adisc->els.status) {
 		/* request rejected or timed out */
@@ -295,7 +402,7 @@
 	sg_init_one(adisc->els.req, &adisc->ls_adisc,
 		    sizeof(struct zfcp_ls_adisc));
 	sg_init_one(adisc->els.resp, &adisc->ls_adisc_acc,
-		    sizeof(struct zfcp_ls_adisc_acc));
+		    sizeof(struct zfcp_ls_adisc));
 
 	adisc->els.req_count = 1;
 	adisc->els.resp_count = 1;
@@ -338,30 +445,6 @@
 		zfcp_erp_port_forced_reopen(port, 0, 65, NULL);
 }
 
-static int zfcp_scan_get_nameserver(struct zfcp_adapter *adapter)
-{
-	int ret;
-
-	if (!adapter->nameserver_port)
-		return -EINTR;
-
-	if (!atomic_test_mask(ZFCP_STATUS_COMMON_UNBLOCKED,
-			       &adapter->nameserver_port->status)) {
-		ret = zfcp_erp_port_reopen(adapter->nameserver_port, 0, 148,
-					   NULL);
-		if (ret)
-			return ret;
-		zfcp_erp_wait(adapter);
-	}
-	return !atomic_test_mask(ZFCP_STATUS_COMMON_UNBLOCKED,
-				  &adapter->nameserver_port->status);
-}
-
-static void zfcp_gpn_ft_handler(unsigned long _done)
-{
-	complete((struct completion *)_done);
-}
-
 static void zfcp_free_sg_env(struct zfcp_gpn_ft *gpn_ft)
 {
 	struct scatterlist *sg = &gpn_ft->sg_req;
@@ -403,7 +486,7 @@
 {
 	struct zfcp_send_ct *ct = &gpn_ft->ct;
 	struct ct_iu_gpn_ft_req *req = sg_virt(&gpn_ft->sg_req);
-	struct completion done;
+	struct zfcp_fc_ns_handler_data compl_rec;
 	int ret;
 
 	/* prepare CT IU for GPN_FT */
@@ -420,19 +503,20 @@
 	req->fc4_type = ZFCP_CT_SCSI_FCP;
 
 	/* prepare zfcp_send_ct */
-	ct->port = adapter->nameserver_port;
-	ct->handler = zfcp_gpn_ft_handler;
-	ct->handler_data = (unsigned long)&done;
+	ct->wka_port = &adapter->nsp;
+	ct->handler = zfcp_fc_ns_handler;
+	ct->handler_data = (unsigned long)&compl_rec;
 	ct->timeout = 10;
 	ct->req = &gpn_ft->sg_req;
 	ct->resp = gpn_ft->sg_resp;
 	ct->req_count = 1;
 	ct->resp_count = ZFCP_GPN_FT_BUFFERS;
 
-	init_completion(&done);
+	init_completion(&compl_rec.done);
+	compl_rec.handler = NULL;
 	ret = zfcp_fsf_send_ct(ct, NULL, NULL);
 	if (!ret)
-		wait_for_completion(&done);
+		wait_for_completion(&compl_rec.done);
 	return ret;
 }
 
@@ -442,9 +526,8 @@
 
 	atomic_clear_mask(ZFCP_STATUS_COMMON_NOESC, &port->status);
 
-	if (port == adapter->nameserver_port)
-		return;
-	if ((port->supported_classes != 0) || (port->units != 0)) {
+	if ((port->supported_classes != 0) ||
+	    !list_empty(&port->unit_list_head)) {
 		zfcp_port_put(port);
 		return;
 	}
@@ -460,7 +543,7 @@
 	struct scatterlist *sg = gpn_ft->sg_resp;
 	struct ct_hdr *hdr = sg_virt(sg);
 	struct gpn_ft_resp_acc *acc = sg_virt(sg);
-	struct zfcp_adapter *adapter = ct->port->adapter;
+	struct zfcp_adapter *adapter = ct->wka_port->adapter;
 	struct zfcp_port *port, *tmp;
 	u32 d_id;
 	int ret = 0, x, last = 0;
@@ -490,6 +573,9 @@
 		d_id = acc->port_id[0] << 16 | acc->port_id[1] << 8 |
 		       acc->port_id[2];
 
+		/* don't attach ports with a well known address */
+		if ((d_id & ZFCP_DID_WKA) == ZFCP_DID_WKA)
+			continue;
 		/* skip the adapter's port and known remote ports */
 		if (acc->wwpn == fc_host_port_name(adapter->scsi_host))
 			continue;
@@ -528,13 +614,15 @@
 	if (fc_host_port_type(adapter->scsi_host) != FC_PORTTYPE_NPORT)
 		return 0;
 
-	ret = zfcp_scan_get_nameserver(adapter);
+	ret = zfcp_wka_port_get(&adapter->nsp);
 	if (ret)
 		return ret;
 
 	gpn_ft = zfcp_alloc_sg_env();
-	if (!gpn_ft)
-		return -ENOMEM;
+	if (!gpn_ft) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	for (i = 0; i < 3; i++) {
 		ret = zfcp_scan_issue_gpn_ft(gpn_ft, adapter);
@@ -547,7 +635,8 @@
 		}
 	}
 	zfcp_free_sg_env(gpn_ft);
-
+out:
+	zfcp_wka_port_put(&adapter->nsp);
 	return ret;
 }
 
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 49dbeb7..739356a 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -50,19 +50,16 @@
 	[FSF_QTCB_UPLOAD_CONTROL_FILE] =  FSF_SUPPORT_COMMAND
 };
 
-static const char *zfcp_act_subtable_type[] = {
-	"unknown", "OS", "WWPN", "DID", "LUN"
-};
-
 static void zfcp_act_eval_err(struct zfcp_adapter *adapter, u32 table)
 {
 	u16 subtable = table >> 16;
 	u16 rule = table & 0xffff;
+	const char *act_type[] = { "unknown", "OS", "WWPN", "DID", "LUN" };
 
-	if (subtable && subtable < ARRAY_SIZE(zfcp_act_subtable_type))
+	if (subtable && subtable < ARRAY_SIZE(act_type))
 		dev_warn(&adapter->ccw_device->dev,
-			 "Access denied in subtable %s, rule %d.\n",
-			 zfcp_act_subtable_type[subtable], rule);
+			 "Access denied according to ACT rule type %s, "
+			 "rule %d\n", act_type[subtable], rule);
 }
 
 static void zfcp_fsf_access_denied_port(struct zfcp_fsf_req *req,
@@ -70,8 +67,8 @@
 {
 	struct fsf_qtcb_header *header = &req->qtcb->header;
 	dev_warn(&req->adapter->ccw_device->dev,
-		 "Access denied, cannot send command to port 0x%016Lx.\n",
-		 port->wwpn);
+		 "Access denied to port 0x%016Lx\n",
+		 (unsigned long long)port->wwpn);
 	zfcp_act_eval_err(req->adapter, header->fsf_status_qual.halfword[0]);
 	zfcp_act_eval_err(req->adapter, header->fsf_status_qual.halfword[1]);
 	zfcp_erp_port_access_denied(port, 55, req);
@@ -83,8 +80,9 @@
 {
 	struct fsf_qtcb_header *header = &req->qtcb->header;
 	dev_warn(&req->adapter->ccw_device->dev,
-		 "Access denied for unit 0x%016Lx on port 0x%016Lx.\n",
-		 unit->fcp_lun, unit->port->wwpn);
+		 "Access denied to unit 0x%016Lx on port 0x%016Lx\n",
+		 (unsigned long long)unit->fcp_lun,
+		 (unsigned long long)unit->port->wwpn);
 	zfcp_act_eval_err(req->adapter, header->fsf_status_qual.halfword[0]);
 	zfcp_act_eval_err(req->adapter, header->fsf_status_qual.halfword[1]);
 	zfcp_erp_unit_access_denied(unit, 59, req);
@@ -93,9 +91,8 @@
 
 static void zfcp_fsf_class_not_supp(struct zfcp_fsf_req *req)
 {
-	dev_err(&req->adapter->ccw_device->dev,
-		"Required FC class not supported by adapter, "
-		"shutting down adapter.\n");
+	dev_err(&req->adapter->ccw_device->dev, "FCP device not "
+		"operational because of an unsupported FC class\n");
 	zfcp_erp_adapter_shutdown(req->adapter, 0, 123, req);
 	req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 }
@@ -171,42 +168,6 @@
 	read_unlock_irqrestore(&zfcp_data.config_lock, flags);
 }
 
-static void zfcp_fsf_bit_error_threshold(struct zfcp_fsf_req *req)
-{
-	struct zfcp_adapter *adapter = req->adapter;
-	struct fsf_status_read_buffer *sr_buf = req->data;
-	struct fsf_bit_error_payload *err = &sr_buf->payload.bit_error;
-
-	dev_warn(&adapter->ccw_device->dev,
-		 "Warning: bit error threshold data "
-		 "received for the adapter: "
-		 "link failures = %i, loss of sync errors = %i, "
-		 "loss of signal errors = %i, "
-		 "primitive sequence errors = %i, "
-		 "invalid transmission word errors = %i, "
-		 "CRC errors = %i).\n",
-		 err->link_failure_error_count,
-		 err->loss_of_sync_error_count,
-		 err->loss_of_signal_error_count,
-		 err->primitive_sequence_error_count,
-		 err->invalid_transmission_word_error_count,
-		 err->crc_error_count);
-	dev_warn(&adapter->ccw_device->dev,
-		 "Additional bit error threshold data of the adapter: "
-		 "primitive sequence event time-outs = %i, "
-		 "elastic buffer overrun errors = %i, "
-		 "advertised receive buffer-to-buffer credit = %i, "
-		 "current receice buffer-to-buffer credit = %i, "
-		 "advertised transmit buffer-to-buffer credit = %i, "
-		 "current transmit buffer-to-buffer credit = %i).\n",
-		 err->primitive_sequence_event_timeout_count,
-		 err->elastic_buffer_overrun_error_count,
-		 err->advertised_receive_b2b_credit,
-		 err->current_receive_b2b_credit,
-		 err->advertised_transmit_b2b_credit,
-		 err->current_transmit_b2b_credit);
-}
-
 static void zfcp_fsf_link_down_info_eval(struct zfcp_fsf_req *req, u8 id,
 					 struct fsf_link_down_info *link_down)
 {
@@ -223,62 +184,66 @@
 	switch (link_down->error_code) {
 	case FSF_PSQ_LINK_NO_LIGHT:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "The local link is down: no light detected.\n");
+			 "There is no light signal from the local "
+			 "fibre channel cable\n");
 		break;
 	case FSF_PSQ_LINK_WRAP_PLUG:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "The local link is down: wrap plug detected.\n");
+			 "There is a wrap plug instead of a fibre "
+			 "channel cable\n");
 		break;
 	case FSF_PSQ_LINK_NO_FCP:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "The local link is down: "
-			 "adjacent node on link does not support FCP.\n");
+			 "The adjacent fibre channel node does not "
+			 "support FCP\n");
 		break;
 	case FSF_PSQ_LINK_FIRMWARE_UPDATE:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "The local link is down: "
-			 "firmware update in progress.\n");
+			 "The FCP device is suspended because of a "
+			 "firmware update\n");
 		break;
 	case FSF_PSQ_LINK_INVALID_WWPN:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "The local link is down: "
-			 "duplicate or invalid WWPN detected.\n");
+			 "The FCP device detected a WWPN that is "
+			 "duplicate or not valid\n");
 		break;
 	case FSF_PSQ_LINK_NO_NPIV_SUPPORT:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "The local link is down: "
-			 "no support for NPIV by Fabric.\n");
+			 "The fibre channel fabric does not support NPIV\n");
 		break;
 	case FSF_PSQ_LINK_NO_FCP_RESOURCES:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "The local link is down: "
-			 "out of resource in FCP daughtercard.\n");
+			 "The FCP adapter cannot support more NPIV ports\n");
 		break;
 	case FSF_PSQ_LINK_NO_FABRIC_RESOURCES:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "The local link is down: "
-			 "out of resource in Fabric.\n");
+			 "The adjacent switch cannot support "
+			 "more NPIV ports\n");
 		break;
 	case FSF_PSQ_LINK_FABRIC_LOGIN_UNABLE:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "The local link is down: "
-			 "unable to login to Fabric.\n");
+			 "The FCP adapter could not log in to the "
+			 "fibre channel fabric\n");
 		break;
 	case FSF_PSQ_LINK_WWPN_ASSIGNMENT_CORRUPTED:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "WWPN assignment file corrupted on adapter.\n");
+			 "The WWPN assignment file on the FCP adapter "
+			 "has been damaged\n");
 		break;
 	case FSF_PSQ_LINK_MODE_TABLE_CURRUPTED:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "Mode table corrupted on adapter.\n");
+			 "The mode table on the FCP adapter "
+			 "has been damaged\n");
 		break;
 	case FSF_PSQ_LINK_NO_WWPN_ASSIGNMENT:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "No WWPN for assignment table on adapter.\n");
+			 "All NPIV ports on the FCP adapter have "
+			 "been assigned\n");
 		break;
 	default:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "The local link to adapter is down.\n");
+			 "The link between the FCP adapter and "
+			 "the FC fabric is down\n");
 	}
 out:
 	zfcp_erp_adapter_failed(adapter, id, req);
@@ -286,27 +251,18 @@
 
 static void zfcp_fsf_status_read_link_down(struct zfcp_fsf_req *req)
 {
-	struct zfcp_adapter *adapter = req->adapter;
 	struct fsf_status_read_buffer *sr_buf = req->data;
 	struct fsf_link_down_info *ldi =
 		(struct fsf_link_down_info *) &sr_buf->payload;
 
 	switch (sr_buf->status_subtype) {
 	case FSF_STATUS_READ_SUB_NO_PHYSICAL_LINK:
-		dev_warn(&adapter->ccw_device->dev,
-			 "Physical link is down.\n");
 		zfcp_fsf_link_down_info_eval(req, 38, ldi);
 		break;
 	case FSF_STATUS_READ_SUB_FDISC_FAILED:
-		dev_warn(&adapter->ccw_device->dev,
-			 "Local link is down "
-			 "due to failed FDISC login.\n");
 		zfcp_fsf_link_down_info_eval(req, 39, ldi);
 		break;
 	case FSF_STATUS_READ_SUB_FIRMWARE_UPDATE:
-		dev_warn(&adapter->ccw_device->dev,
-			 "Local link is down "
-			 "due to firmware update on adapter.\n");
 		zfcp_fsf_link_down_info_eval(req, 40, NULL);
 	};
 }
@@ -335,14 +291,17 @@
 	case FSF_STATUS_READ_SENSE_DATA_AVAIL:
 		break;
 	case FSF_STATUS_READ_BIT_ERROR_THRESHOLD:
-		zfcp_fsf_bit_error_threshold(req);
+		dev_warn(&adapter->ccw_device->dev,
+			 "The error threshold for checksum statistics "
+			 "has been exceeded\n");
+		zfcp_hba_dbf_event_berr(adapter, req);
 		break;
 	case FSF_STATUS_READ_LINK_DOWN:
 		zfcp_fsf_status_read_link_down(req);
 		break;
 	case FSF_STATUS_READ_LINK_UP:
 		dev_info(&adapter->ccw_device->dev,
-			 "Local link was replugged.\n");
+			 "The local link has been restored\n");
 		/* All ports should be marked as ready to run again */
 		zfcp_erp_modify_adapter_status(adapter, 30, NULL,
 					       ZFCP_STATUS_COMMON_RUNNING,
@@ -370,7 +329,7 @@
 	zfcp_fsf_req_free(req);
 
 	atomic_inc(&adapter->stat_miss);
-	schedule_work(&adapter->stat_work);
+	queue_work(zfcp_data.work_queue, &adapter->stat_work);
 }
 
 static void zfcp_fsf_fsfstatus_qual_eval(struct zfcp_fsf_req *req)
@@ -386,8 +345,8 @@
 		break;
 	case FSF_SQ_NO_RECOM:
 		dev_err(&req->adapter->ccw_device->dev,
-			"No recommendation could be given for a "
-			"problem on the adapter.\n");
+			"The FCP adapter reported a problem "
+			"that cannot be recovered\n");
 		zfcp_erp_adapter_shutdown(req->adapter, 0, 121, req);
 		break;
 	}
@@ -403,8 +362,7 @@
 	switch (req->qtcb->header.fsf_status) {
 	case FSF_UNKNOWN_COMMAND:
 		dev_err(&req->adapter->ccw_device->dev,
-			"Command issued by the device driver (0x%x) is "
-			"not known by the adapter.\n",
+			"The FCP adapter does not recognize the command 0x%x\n",
 			req->qtcb->header.fsf_command);
 		zfcp_erp_adapter_shutdown(req->adapter, 0, 120, req);
 		req->status |= ZFCP_STATUS_FSFREQ_ERROR;
@@ -435,11 +393,9 @@
 		return;
 	case FSF_PROT_QTCB_VERSION_ERROR:
 		dev_err(&adapter->ccw_device->dev,
-			"The QTCB version requested by zfcp (0x%x) is not "
-			"supported by the FCP adapter (lowest supported "
-			"0x%x, highest supported 0x%x).\n",
-			FSF_QTCB_CURRENT_VERSION, psq->word[0],
-			psq->word[1]);
+			"QTCB version 0x%x not supported by FCP adapter "
+			"(0x%x to 0x%x)\n", FSF_QTCB_CURRENT_VERSION,
+			psq->word[0], psq->word[1]);
 		zfcp_erp_adapter_shutdown(adapter, 0, 117, req);
 		break;
 	case FSF_PROT_ERROR_STATE:
@@ -449,8 +405,7 @@
 		break;
 	case FSF_PROT_UNSUPP_QTCB_TYPE:
 		dev_err(&adapter->ccw_device->dev,
-			"Packet header type used by the device driver is "
-			"incompatible with that used on the adapter.\n");
+			"The QTCB type is not supported by the FCP adapter\n");
 		zfcp_erp_adapter_shutdown(adapter, 0, 118, req);
 		break;
 	case FSF_PROT_HOST_CONNECTION_INITIALIZING:
@@ -459,7 +414,7 @@
 		break;
 	case FSF_PROT_DUPLICATE_REQUEST_ID:
 		dev_err(&adapter->ccw_device->dev,
-			"The request identifier 0x%Lx is ambiguous.\n",
+			"0x%Lx is an ambiguous request identifier\n",
 			(unsigned long long)qtcb->bottom.support.req_handle);
 		zfcp_erp_adapter_shutdown(adapter, 0, 78, req);
 		break;
@@ -479,9 +434,7 @@
 		break;
 	default:
 		dev_err(&adapter->ccw_device->dev,
-			"Transfer protocol status information"
-			"provided by the adapter (0x%x) "
-			"is not compatible with the device driver.\n",
+			"0x%x is not a valid transfer protocol status\n",
 			qtcb->prefix.prot_status);
 		zfcp_erp_adapter_shutdown(adapter, 0, 119, req);
 	}
@@ -559,33 +512,17 @@
 		adapter->peer_wwpn = bottom->plogi_payload.wwpn;
 		adapter->peer_wwnn = bottom->plogi_payload.wwnn;
 		fc_host_port_type(shost) = FC_PORTTYPE_PTP;
-		if (req->erp_action)
-			dev_info(&adapter->ccw_device->dev,
-				 "Point-to-Point fibrechannel "
-				 "configuration detected.\n");
 		break;
 	case FSF_TOPO_FABRIC:
 		fc_host_port_type(shost) = FC_PORTTYPE_NPORT;
-		if (req->erp_action)
-			dev_info(&adapter->ccw_device->dev,
-				 "Switched fabric fibrechannel "
-				 "network detected.\n");
 		break;
 	case FSF_TOPO_AL:
 		fc_host_port_type(shost) = FC_PORTTYPE_NLPORT;
-		dev_err(&adapter->ccw_device->dev,
-			"Unsupported arbitrated loop fibrechannel "
-			"topology detected, shutting down "
-			"adapter.\n");
-		zfcp_erp_adapter_shutdown(adapter, 0, 127, req);
-		return -EIO;
 	default:
-		fc_host_port_type(shost) = FC_PORTTYPE_UNKNOWN;
 		dev_err(&adapter->ccw_device->dev,
-			"The fibrechannel topology reported by the"
-			" adapter is not known by the zfcp driver,"
-			" shutting down adapter.\n");
-		zfcp_erp_adapter_shutdown(adapter, 0, 128, req);
+			"Unknown or unsupported arbitrated loop "
+			"fibre channel topology detected\n");
+		zfcp_erp_adapter_shutdown(adapter, 0, 127, req);
 		return -EIO;
 	}
 
@@ -616,11 +553,9 @@
 
 		if (bottom->max_qtcb_size < sizeof(struct fsf_qtcb)) {
 			dev_err(&adapter->ccw_device->dev,
-				"Maximum QTCB size (%d bytes) allowed by "
-				"the adapter is lower than the minimum "
-				"required by the driver (%ld bytes).\n",
-				bottom->max_qtcb_size,
-				sizeof(struct fsf_qtcb));
+				"FCP adapter maximum QTCB size (%d bytes) "
+				"is too small\n",
+				bottom->max_qtcb_size);
 			zfcp_erp_adapter_shutdown(adapter, 0, 129, req);
 			return;
 		}
@@ -656,15 +591,15 @@
 
 	if (FSF_QTCB_CURRENT_VERSION < bottom->low_qtcb_version) {
 		dev_err(&adapter->ccw_device->dev,
-			"The adapter only supports newer control block "
-			"versions, try updated device driver.\n");
+			"The FCP adapter only supports newer "
+			"control block versions\n");
 		zfcp_erp_adapter_shutdown(adapter, 0, 125, req);
 		return;
 	}
 	if (FSF_QTCB_CURRENT_VERSION > bottom->high_qtcb_version) {
 		dev_err(&adapter->ccw_device->dev,
-			"The adapter only supports older control block "
-			"versions, consider a microcode upgrade.\n");
+			"The FCP adapter only supports older "
+			"control block versions\n");
 		zfcp_erp_adapter_shutdown(adapter, 0, 126, req);
 	}
 }
@@ -688,7 +623,6 @@
 
 static void zfcp_fsf_exchange_port_data_handler(struct zfcp_fsf_req *req)
 {
-	struct zfcp_adapter *adapter = req->adapter;
 	struct fsf_qtcb *qtcb = req->qtcb;
 
 	if (req->status & ZFCP_STATUS_FSFREQ_ERROR)
@@ -697,38 +631,47 @@
 	switch (qtcb->header.fsf_status) {
 	case FSF_GOOD:
 		zfcp_fsf_exchange_port_evaluate(req);
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status);
 		break;
 	case FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE:
 		zfcp_fsf_exchange_port_evaluate(req);
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status);
 		zfcp_fsf_link_down_info_eval(req, 43,
 			&qtcb->header.fsf_status_qual.link_down_info);
 		break;
 	}
 }
 
-static int zfcp_fsf_sbal_check(struct zfcp_qdio_queue *queue)
+static int zfcp_fsf_sbal_check(struct zfcp_adapter *adapter)
 {
-	spin_lock_bh(&queue->lock);
-	if (atomic_read(&queue->count))
+	struct zfcp_qdio_queue *req_q = &adapter->req_q;
+
+	spin_lock_bh(&adapter->req_q_lock);
+	if (atomic_read(&req_q->count))
 		return 1;
-	spin_unlock_bh(&queue->lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 	return 0;
 }
 
+static int zfcp_fsf_sbal_available(struct zfcp_adapter *adapter)
+{
+	unsigned int count = atomic_read(&adapter->req_q.count);
+	if (!count)
+		atomic_inc(&adapter->qdio_outb_full);
+	return count > 0;
+}
+
 static int zfcp_fsf_req_sbal_get(struct zfcp_adapter *adapter)
 {
 	long ret;
-	struct zfcp_qdio_queue *req_q = &adapter->req_q;
 
-	spin_unlock_bh(&req_q->lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 	ret = wait_event_interruptible_timeout(adapter->request_wq,
-					zfcp_fsf_sbal_check(req_q), 5 * HZ);
+					zfcp_fsf_sbal_check(adapter), 5 * HZ);
 	if (ret > 0)
 		return 0;
+	if (!ret)
+		atomic_inc(&adapter->qdio_outb_full);
 
-	spin_lock_bh(&req_q->lock);
+	spin_lock_bh(&adapter->req_q_lock);
 	return -EIO;
 }
 
@@ -765,7 +708,7 @@
 						u32 fsf_cmd, int req_flags,
 						mempool_t *pool)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 
 	struct zfcp_fsf_req *req;
 	struct zfcp_qdio_queue *req_q = &adapter->req_q;
@@ -867,10 +810,10 @@
 {
 	struct zfcp_fsf_req *req;
 	struct fsf_status_read_buffer *sr_buf;
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	int retval = -EIO;
 
-	spin_lock_bh(&adapter->req_q.lock);
+	spin_lock_bh(&adapter->req_q_lock);
 	if (zfcp_fsf_req_sbal_get(adapter))
 		goto out;
 
@@ -910,7 +853,7 @@
 	zfcp_fsf_req_free(req);
 	zfcp_hba_dbf_event_fsf_unsol("fail", adapter, NULL);
 out:
-	spin_unlock_bh(&adapter->req_q.lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 	return retval;
 }
 
@@ -980,11 +923,11 @@
 						struct zfcp_unit *unit,
 						int req_flags)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	struct zfcp_fsf_req *req = NULL;
 
-	spin_lock(&adapter->req_q.lock);
-	if (!atomic_read(&adapter->req_q.count))
+	spin_lock(&adapter->req_q_lock);
+	if (!zfcp_fsf_sbal_available(adapter))
 		goto out;
 	req = zfcp_fsf_req_create(adapter, FSF_QTCB_ABORT_FCP_CMND,
 				  req_flags, adapter->pool.fsf_req_abort);
@@ -1013,7 +956,7 @@
 	zfcp_fsf_req_free(req);
 	req = NULL;
 out:
-	spin_unlock(&adapter->req_q.lock);
+	spin_unlock(&adapter->req_q_lock);
 	return req;
 }
 
@@ -1021,7 +964,6 @@
 {
 	struct zfcp_adapter *adapter = req->adapter;
 	struct zfcp_send_ct *send_ct = req->data;
-	struct zfcp_port *port = send_ct->port;
 	struct fsf_qtcb_header *header = &req->qtcb->header;
 
 	send_ct->status = -EINVAL;
@@ -1040,17 +982,14 @@
         case FSF_ADAPTER_STATUS_AVAILABLE:
                 switch (header->fsf_status_qual.word[0]){
                 case FSF_SQ_INVOKE_LINK_TEST_PROCEDURE:
-			zfcp_test_link(port);
                 case FSF_SQ_ULP_DEPENDENT_ERP_REQUIRED:
 			req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 			break;
                 }
                 break;
 	case FSF_ACCESS_DENIED:
-		zfcp_fsf_access_denied_port(req, port);
 		break;
         case FSF_PORT_BOXED:
-		zfcp_erp_port_boxed(port, 49, req);
 		req->status |= ZFCP_STATUS_FSFREQ_ERROR |
 			       ZFCP_STATUS_FSFREQ_RETRY;
 		break;
@@ -1101,12 +1040,12 @@
 int zfcp_fsf_send_ct(struct zfcp_send_ct *ct, mempool_t *pool,
 		     struct zfcp_erp_action *erp_action)
 {
-	struct zfcp_port *port = ct->port;
-	struct zfcp_adapter *adapter = port->adapter;
+	struct zfcp_wka_port *wka_port = ct->wka_port;
+	struct zfcp_adapter *adapter = wka_port->adapter;
 	struct zfcp_fsf_req *req;
 	int ret = -EIO;
 
-	spin_lock_bh(&adapter->req_q.lock);
+	spin_lock_bh(&adapter->req_q_lock);
 	if (zfcp_fsf_req_sbal_get(adapter))
 		goto out;
 
@@ -1123,7 +1062,7 @@
 		goto failed_send;
 
 	req->handler = zfcp_fsf_send_ct_handler;
-	req->qtcb->header.port_handle = port->handle;
+	req->qtcb->header.port_handle = wka_port->handle;
 	req->qtcb->bottom.support.service_class = FSF_CLASS_3;
 	req->qtcb->bottom.support.timeout = ct->timeout;
 	req->data = ct;
@@ -1148,7 +1087,7 @@
 	if (erp_action)
 		erp_action->fsf_req = NULL;
 out:
-	spin_unlock_bh(&adapter->req_q.lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 	return ret;
 }
 
@@ -1218,8 +1157,8 @@
 		       ZFCP_STATUS_COMMON_UNBLOCKED)))
 		return -EBUSY;
 
-	spin_lock(&adapter->req_q.lock);
-	if (!atomic_read(&adapter->req_q.count))
+	spin_lock(&adapter->req_q_lock);
+	if (!zfcp_fsf_sbal_available(adapter))
 		goto out;
 	req = zfcp_fsf_req_create(adapter, FSF_QTCB_SEND_ELS,
 				  ZFCP_REQ_AUTO_CLEANUP, NULL);
@@ -1228,8 +1167,8 @@
 		goto out;
 	}
 
-	ret = zfcp_fsf_setup_sbals(req, els->req, els->resp,
-				   FSF_MAX_SBALS_PER_ELS_REQ);
+	ret = zfcp_fsf_setup_sbals(req, els->req, els->resp, 2);
+
 	if (ret)
 		goto failed_send;
 
@@ -1252,19 +1191,19 @@
 failed_send:
 	zfcp_fsf_req_free(req);
 out:
-	spin_unlock(&adapter->req_q.lock);
+	spin_unlock(&adapter->req_q_lock);
 	return ret;
 }
 
 int zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	struct zfcp_fsf_req *req;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	int retval = -EIO;
 
-	spin_lock_bh(&adapter->req_q.lock);
-	if (!atomic_read(&adapter->req_q.count))
+	spin_lock_bh(&adapter->req_q_lock);
+	if (!zfcp_fsf_sbal_available(adapter))
 		goto out;
 	req = zfcp_fsf_req_create(adapter,
 				  FSF_QTCB_EXCHANGE_CONFIG_DATA,
@@ -1295,18 +1234,18 @@
 		erp_action->fsf_req = NULL;
 	}
 out:
-	spin_unlock_bh(&adapter->req_q.lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 	return retval;
 }
 
 int zfcp_fsf_exchange_config_data_sync(struct zfcp_adapter *adapter,
 				       struct fsf_qtcb_bottom_config *data)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	struct zfcp_fsf_req *req = NULL;
 	int retval = -EIO;
 
-	spin_lock_bh(&adapter->req_q.lock);
+	spin_lock_bh(&adapter->req_q_lock);
 	if (zfcp_fsf_req_sbal_get(adapter))
 		goto out;
 
@@ -1334,7 +1273,7 @@
 	zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT);
 	retval = zfcp_fsf_req_send(req);
 out:
-	spin_unlock_bh(&adapter->req_q.lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 	if (!retval)
 		wait_event(req->completion_wq,
 			   req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
@@ -1351,7 +1290,7 @@
  */
 int zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	struct zfcp_fsf_req *req;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	int retval = -EIO;
@@ -1359,8 +1298,8 @@
 	if (!(adapter->adapter_features & FSF_FEATURE_HBAAPI_MANAGEMENT))
 		return -EOPNOTSUPP;
 
-	spin_lock_bh(&adapter->req_q.lock);
-	if (!atomic_read(&adapter->req_q.count))
+	spin_lock_bh(&adapter->req_q_lock);
+	if (!zfcp_fsf_sbal_available(adapter))
 		goto out;
 	req = zfcp_fsf_req_create(adapter, FSF_QTCB_EXCHANGE_PORT_DATA,
 				  ZFCP_REQ_AUTO_CLEANUP,
@@ -1385,7 +1324,7 @@
 		erp_action->fsf_req = NULL;
 	}
 out:
-	spin_unlock_bh(&adapter->req_q.lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 	return retval;
 }
 
@@ -1398,15 +1337,15 @@
 int zfcp_fsf_exchange_port_data_sync(struct zfcp_adapter *adapter,
 				     struct fsf_qtcb_bottom_port *data)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	struct zfcp_fsf_req *req = NULL;
 	int retval = -EIO;
 
 	if (!(adapter->adapter_features & FSF_FEATURE_HBAAPI_MANAGEMENT))
 		return -EOPNOTSUPP;
 
-	spin_lock_bh(&adapter->req_q.lock);
-	if (!atomic_read(&adapter->req_q.count))
+	spin_lock_bh(&adapter->req_q_lock);
+	if (!zfcp_fsf_sbal_available(adapter))
 		goto out;
 
 	req = zfcp_fsf_req_create(adapter, FSF_QTCB_EXCHANGE_PORT_DATA, 0,
@@ -1427,7 +1366,7 @@
 	zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT);
 	retval = zfcp_fsf_req_send(req);
 out:
-	spin_unlock_bh(&adapter->req_q.lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 	if (!retval)
 		wait_event(req->completion_wq,
 			   req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
@@ -1443,7 +1382,7 @@
 	struct fsf_plogi *plogi;
 
 	if (req->status & ZFCP_STATUS_FSFREQ_ERROR)
-		goto skip_fsfstatus;
+		return;
 
 	switch (header->fsf_status) {
 	case FSF_PORT_ALREADY_OPEN:
@@ -1453,9 +1392,9 @@
 		break;
 	case FSF_MAXIMUM_NUMBER_OF_PORTS_EXCEEDED:
 		dev_warn(&req->adapter->ccw_device->dev,
-			 "The adapter is out of resources. The remote port "
-			 "0x%016Lx could not be opened, disabling it.\n",
-			 port->wwpn);
+			 "Not enough FCP adapter resources to open "
+			 "remote port 0x%016Lx\n",
+			 (unsigned long long)port->wwpn);
 		zfcp_erp_port_failed(port, 31, req);
 		req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
@@ -1467,8 +1406,8 @@
 			break;
 		case FSF_SQ_NO_RETRY_POSSIBLE:
 			dev_warn(&req->adapter->ccw_device->dev,
-				 "The remote port 0x%016Lx could not be "
-				 "opened. Disabling it.\n", port->wwpn);
+				 "Remote port 0x%016Lx could not be opened\n",
+				 (unsigned long long)port->wwpn);
 			zfcp_erp_port_failed(port, 32, req);
 			req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 			break;
@@ -1496,9 +1435,6 @@
 		 * another GID_PN straight after a port has been opened.
 		 * Alternately, an ADISC/PDISC ELS should suffice, as well.
 		 */
-		if (atomic_read(&port->status) & ZFCP_STATUS_PORT_NO_WWPN)
-			break;
-
 		plogi = (struct fsf_plogi *) req->qtcb->bottom.support.els;
 		if (req->qtcb->bottom.support.els1_length >= sizeof(*plogi)) {
 			if (plogi->serv_param.wwpn != port->wwpn)
@@ -1514,9 +1450,6 @@
 		req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 	}
-
-skip_fsfstatus:
-	atomic_clear_mask(ZFCP_STATUS_COMMON_OPENING, &port->status);
 }
 
 /**
@@ -1526,12 +1459,12 @@
  */
 int zfcp_fsf_open_port(struct zfcp_erp_action *erp_action)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_fsf_req *req;
 	int retval = -EIO;
 
-	spin_lock_bh(&adapter->req_q.lock);
+	spin_lock_bh(&adapter->req_q_lock);
 	if (zfcp_fsf_req_sbal_get(adapter))
 		goto out;
 
@@ -1553,7 +1486,6 @@
 	req->data = erp_action->port;
 	req->erp_action = erp_action;
 	erp_action->fsf_req = req;
-	atomic_set_mask(ZFCP_STATUS_COMMON_OPENING, &erp_action->port->status);
 
 	zfcp_fsf_start_erp_timer(req);
 	retval = zfcp_fsf_req_send(req);
@@ -1562,7 +1494,7 @@
 		erp_action->fsf_req = NULL;
 	}
 out:
-	spin_unlock_bh(&adapter->req_q.lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 	return retval;
 }
 
@@ -1571,7 +1503,7 @@
 	struct zfcp_port *port = req->data;
 
 	if (req->status & ZFCP_STATUS_FSFREQ_ERROR)
-		goto skip_fsfstatus;
+		return;
 
 	switch (req->qtcb->header.fsf_status) {
 	case FSF_PORT_HANDLE_NOT_VALID:
@@ -1586,9 +1518,6 @@
 					    ZFCP_CLEAR);
 		break;
 	}
-
-skip_fsfstatus:
-	atomic_clear_mask(ZFCP_STATUS_COMMON_CLOSING, &port->status);
 }
 
 /**
@@ -1598,12 +1527,12 @@
  */
 int zfcp_fsf_close_port(struct zfcp_erp_action *erp_action)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_fsf_req *req;
 	int retval = -EIO;
 
-	spin_lock_bh(&adapter->req_q.lock);
+	spin_lock_bh(&adapter->req_q_lock);
 	if (zfcp_fsf_req_sbal_get(adapter))
 		goto out;
 
@@ -1624,7 +1553,6 @@
 	req->erp_action = erp_action;
 	req->qtcb->header.port_handle = erp_action->port->handle;
 	erp_action->fsf_req = req;
-	atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING, &erp_action->port->status);
 
 	zfcp_fsf_start_erp_timer(req);
 	retval = zfcp_fsf_req_send(req);
@@ -1633,7 +1561,131 @@
 		erp_action->fsf_req = NULL;
 	}
 out:
-	spin_unlock_bh(&adapter->req_q.lock);
+	spin_unlock_bh(&adapter->req_q_lock);
+	return retval;
+}
+
+static void zfcp_fsf_open_wka_port_handler(struct zfcp_fsf_req *req)
+{
+	struct zfcp_wka_port *wka_port = req->data;
+	struct fsf_qtcb_header *header = &req->qtcb->header;
+
+	if (req->status & ZFCP_STATUS_FSFREQ_ERROR) {
+		wka_port->status = ZFCP_WKA_PORT_OFFLINE;
+		goto out;
+	}
+
+	switch (header->fsf_status) {
+	case FSF_MAXIMUM_NUMBER_OF_PORTS_EXCEEDED:
+		dev_warn(&req->adapter->ccw_device->dev,
+			 "Opening WKA port 0x%x failed\n", wka_port->d_id);
+	case FSF_ADAPTER_STATUS_AVAILABLE:
+		req->status |= ZFCP_STATUS_FSFREQ_ERROR;
+	case FSF_ACCESS_DENIED:
+		wka_port->status = ZFCP_WKA_PORT_OFFLINE;
+		break;
+	case FSF_PORT_ALREADY_OPEN:
+	case FSF_GOOD:
+		wka_port->handle = header->port_handle;
+		wka_port->status = ZFCP_WKA_PORT_ONLINE;
+	}
+out:
+	wake_up(&wka_port->completion_wq);
+}
+
+/**
+ * zfcp_fsf_open_wka_port - create and send open wka-port request
+ * @wka_port: pointer to struct zfcp_wka_port
+ * Returns: 0 on success, error otherwise
+ */
+int zfcp_fsf_open_wka_port(struct zfcp_wka_port *wka_port)
+{
+	struct qdio_buffer_element *sbale;
+	struct zfcp_adapter *adapter = wka_port->adapter;
+	struct zfcp_fsf_req *req;
+	int retval = -EIO;
+
+	spin_lock_bh(&adapter->req_q_lock);
+	if (zfcp_fsf_req_sbal_get(adapter))
+		goto out;
+
+	req = zfcp_fsf_req_create(adapter,
+				  FSF_QTCB_OPEN_PORT_WITH_DID,
+				  ZFCP_REQ_AUTO_CLEANUP,
+				  adapter->pool.fsf_req_erp);
+	if (unlikely(IS_ERR(req))) {
+		retval = PTR_ERR(req);
+		goto out;
+	}
+
+	sbale = zfcp_qdio_sbale_req(req);
+	sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
+	sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
+
+	req->handler = zfcp_fsf_open_wka_port_handler;
+	req->qtcb->bottom.support.d_id = wka_port->d_id;
+	req->data = wka_port;
+
+	zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT);
+	retval = zfcp_fsf_req_send(req);
+	if (retval)
+		zfcp_fsf_req_free(req);
+out:
+	spin_unlock_bh(&adapter->req_q_lock);
+	return retval;
+}
+
+static void zfcp_fsf_close_wka_port_handler(struct zfcp_fsf_req *req)
+{
+	struct zfcp_wka_port *wka_port = req->data;
+
+	if (req->qtcb->header.fsf_status == FSF_PORT_HANDLE_NOT_VALID) {
+		req->status |= ZFCP_STATUS_FSFREQ_ERROR;
+		zfcp_erp_adapter_reopen(wka_port->adapter, 0, 84, req);
+	}
+
+	wka_port->status = ZFCP_WKA_PORT_OFFLINE;
+	wake_up(&wka_port->completion_wq);
+}
+
+/**
+ * zfcp_fsf_close_wka_port - create and send close wka port request
+ * @erp_action: pointer to struct zfcp_erp_action
+ * Returns: 0 on success, error otherwise
+ */
+int zfcp_fsf_close_wka_port(struct zfcp_wka_port *wka_port)
+{
+	struct qdio_buffer_element *sbale;
+	struct zfcp_adapter *adapter = wka_port->adapter;
+	struct zfcp_fsf_req *req;
+	int retval = -EIO;
+
+	spin_lock_bh(&adapter->req_q_lock);
+	if (zfcp_fsf_req_sbal_get(adapter))
+		goto out;
+
+	req = zfcp_fsf_req_create(adapter, FSF_QTCB_CLOSE_PORT,
+				  ZFCP_REQ_AUTO_CLEANUP,
+				  adapter->pool.fsf_req_erp);
+	if (unlikely(IS_ERR(req))) {
+		retval = PTR_ERR(req);
+		goto out;
+	}
+
+	sbale = zfcp_qdio_sbale_req(req);
+	sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
+	sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
+
+	req->handler = zfcp_fsf_close_wka_port_handler;
+	req->data = wka_port;
+	req->qtcb->header.port_handle = wka_port->handle;
+
+	zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT);
+	retval = zfcp_fsf_req_send(req);
+	if (retval)
+		zfcp_fsf_req_free(req);
+out:
+	spin_unlock_bh(&adapter->req_q_lock);
 	return retval;
 }
 
@@ -1695,12 +1747,12 @@
  */
 int zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_fsf_req *req;
 	int retval = -EIO;
 
-	spin_lock_bh(&adapter->req_q.lock);
+	spin_lock_bh(&adapter->req_q_lock);
 	if (zfcp_fsf_req_sbal_get(adapter))
 		goto out;
 
@@ -1731,7 +1783,7 @@
 		erp_action->fsf_req = NULL;
 	}
 out:
-	spin_unlock_bh(&adapter->req_q.lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 	return retval;
 }
 
@@ -1746,7 +1798,7 @@
 	int exclusive, readwrite;
 
 	if (req->status & ZFCP_STATUS_FSFREQ_ERROR)
-		goto skip_fsfstatus;
+		return;
 
 	atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED |
 			  ZFCP_STATUS_COMMON_ACCESS_BOXED |
@@ -1774,14 +1826,12 @@
 	case FSF_LUN_SHARING_VIOLATION:
 		if (header->fsf_status_qual.word[0])
 			dev_warn(&adapter->ccw_device->dev,
-				 "FCP-LUN 0x%Lx at the remote port "
-				 "with WWPN 0x%Lx "
-				 "connected to the adapter "
-				 "is already in use in LPAR%d, CSS%d.\n",
-				 unit->fcp_lun,
-				 unit->port->wwpn,
-				 queue_designator->hla,
-				 queue_designator->cssid);
+				 "LUN 0x%Lx on port 0x%Lx is already in "
+				 "use by CSS%d, MIF Image ID %x\n",
+				 (unsigned long long)unit->fcp_lun,
+				 (unsigned long long)unit->port->wwpn,
+				 queue_designator->cssid,
+				 queue_designator->hla);
 		else
 			zfcp_act_eval_err(adapter,
 					  header->fsf_status_qual.word[2]);
@@ -1792,9 +1842,10 @@
 		break;
 	case FSF_MAXIMUM_NUMBER_OF_LUNS_EXCEEDED:
 		dev_warn(&adapter->ccw_device->dev,
-			 "The adapter ran out of resources. There is no "
-			 "handle available for unit 0x%016Lx on port 0x%016Lx.",
-			 unit->fcp_lun, unit->port->wwpn);
+			 "No handle is available for LUN "
+			 "0x%016Lx on port 0x%016Lx\n",
+			 (unsigned long long)unit->fcp_lun,
+			 (unsigned long long)unit->port->wwpn);
 		zfcp_erp_unit_failed(unit, 34, req);
 		/* fall through */
 	case FSF_INVALID_COMMAND_OPTION:
@@ -1831,26 +1882,29 @@
                 		atomic_set_mask(ZFCP_STATUS_UNIT_READONLY,
 						&unit->status);
 				dev_info(&adapter->ccw_device->dev,
-					 "Read-only access for unit 0x%016Lx "
-					 "on port 0x%016Lx.\n",
-					 unit->fcp_lun, unit->port->wwpn);
+					 "SCSI device at LUN 0x%016Lx on port "
+					 "0x%016Lx opened read-only\n",
+					 (unsigned long long)unit->fcp_lun,
+					 (unsigned long long)unit->port->wwpn);
         		}
 
         		if (exclusive && !readwrite) {
 				dev_err(&adapter->ccw_device->dev,
-					"Exclusive access of read-only unit "
-					"0x%016Lx on port 0x%016Lx not "
-					"supported, disabling unit.\n",
-					unit->fcp_lun, unit->port->wwpn);
+					"Exclusive read-only access not "
+					"supported (unit 0x%016Lx, "
+					"port 0x%016Lx)\n",
+					(unsigned long long)unit->fcp_lun,
+					(unsigned long long)unit->port->wwpn);
 				zfcp_erp_unit_failed(unit, 35, req);
 				req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 				zfcp_erp_unit_shutdown(unit, 0, 80, req);
         		} else if (!exclusive && readwrite) {
 				dev_err(&adapter->ccw_device->dev,
-					"Shared access of read-write unit "
-					"0x%016Lx on port 0x%016Lx not "
-					"supported, disabling unit.\n",
-					unit->fcp_lun, unit->port->wwpn);
+					"Shared read-write access not "
+					"supported (unit 0x%016Lx, port "
+					"0x%016Lx\n)",
+					(unsigned long long)unit->fcp_lun,
+					(unsigned long long)unit->port->wwpn);
 				zfcp_erp_unit_failed(unit, 36, req);
 				req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 				zfcp_erp_unit_shutdown(unit, 0, 81, req);
@@ -1858,9 +1912,6 @@
 		}
 		break;
 	}
-
-skip_fsfstatus:
-	atomic_clear_mask(ZFCP_STATUS_COMMON_OPENING, &unit->status);
 }
 
 /**
@@ -1870,12 +1921,12 @@
  */
 int zfcp_fsf_open_unit(struct zfcp_erp_action *erp_action)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_fsf_req *req;
 	int retval = -EIO;
 
-	spin_lock_bh(&adapter->req_q.lock);
+	spin_lock_bh(&adapter->req_q_lock);
 	if (zfcp_fsf_req_sbal_get(adapter))
 		goto out;
 
@@ -1901,8 +1952,6 @@
 	if (!(adapter->connection_features & FSF_FEATURE_NPIV_MODE))
 		req->qtcb->bottom.support.option = FSF_OPEN_LUN_SUPPRESS_BOXING;
 
-	atomic_set_mask(ZFCP_STATUS_COMMON_OPENING, &erp_action->unit->status);
-
 	zfcp_fsf_start_erp_timer(req);
 	retval = zfcp_fsf_req_send(req);
 	if (retval) {
@@ -1910,7 +1959,7 @@
 		erp_action->fsf_req = NULL;
 	}
 out:
-	spin_unlock_bh(&adapter->req_q.lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 	return retval;
 }
 
@@ -1919,7 +1968,7 @@
 	struct zfcp_unit *unit = req->data;
 
 	if (req->status & ZFCP_STATUS_FSFREQ_ERROR)
-		goto skip_fsfstatus;
+		return;
 
 	switch (req->qtcb->header.fsf_status) {
 	case FSF_PORT_HANDLE_NOT_VALID:
@@ -1949,8 +1998,6 @@
 		atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN, &unit->status);
 		break;
 	}
-skip_fsfstatus:
-	atomic_clear_mask(ZFCP_STATUS_COMMON_CLOSING, &unit->status);
 }
 
 /**
@@ -1960,12 +2007,12 @@
  */
 int zfcp_fsf_close_unit(struct zfcp_erp_action *erp_action)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 	struct zfcp_fsf_req *req;
 	int retval = -EIO;
 
-	spin_lock_bh(&adapter->req_q.lock);
+	spin_lock_bh(&adapter->req_q_lock);
 	if (zfcp_fsf_req_sbal_get(adapter))
 		goto out;
 	req = zfcp_fsf_req_create(adapter, FSF_QTCB_CLOSE_LUN,
@@ -1986,7 +2033,6 @@
 	req->data = erp_action->unit;
 	req->erp_action = erp_action;
 	erp_action->fsf_req = req;
-	atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING, &erp_action->unit->status);
 
 	zfcp_fsf_start_erp_timer(req);
 	retval = zfcp_fsf_req_send(req);
@@ -1995,7 +2041,7 @@
 		erp_action->fsf_req = NULL;
 	}
 out:
-	spin_unlock_bh(&adapter->req_q.lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 	return retval;
 }
 
@@ -2156,21 +2202,21 @@
 		break;
 	case FSF_DIRECTION_INDICATOR_NOT_VALID:
 		dev_err(&req->adapter->ccw_device->dev,
-			"Invalid data direction (%d) given for unit "
-			"0x%016Lx on port 0x%016Lx, shutting down "
-			"adapter.\n",
+			"Incorrect direction %d, unit 0x%016Lx on port "
+			"0x%016Lx closed\n",
 			req->qtcb->bottom.io.data_direction,
-			unit->fcp_lun, unit->port->wwpn);
+			(unsigned long long)unit->fcp_lun,
+			(unsigned long long)unit->port->wwpn);
 		zfcp_erp_adapter_shutdown(unit->port->adapter, 0, 133, req);
 		req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 	case FSF_CMND_LENGTH_NOT_VALID:
 		dev_err(&req->adapter->ccw_device->dev,
-			"An invalid control-data-block length field (%d) "
-			"was found in a command for unit 0x%016Lx on port "
-			"0x%016Lx. Shutting down adapter.\n",
+			"Incorrect CDB length %d, unit 0x%016Lx on "
+			"port 0x%016Lx closed\n",
 			req->qtcb->bottom.io.fcp_cmnd_length,
-			unit->fcp_lun, unit->port->wwpn);
+			(unsigned long long)unit->fcp_lun,
+			(unsigned long long)unit->port->wwpn);
 		zfcp_erp_adapter_shutdown(unit->port->adapter, 0, 134, req);
 		req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
@@ -2201,6 +2247,20 @@
 	}
 }
 
+static void zfcp_set_fcp_dl(struct fcp_cmnd_iu *fcp_cmd, u32 fcp_dl)
+{
+	u32 *fcp_dl_ptr;
+
+	/*
+	 * fcp_dl_addr = start address of fcp_cmnd structure +
+	 * size of fixed part + size of dynamically sized add_dcp_cdb field
+	 * SEE FCP-2 documentation
+	 */
+	fcp_dl_ptr = (u32 *) ((unsigned char *) &fcp_cmd[1] +
+			(fcp_cmd->add_fcp_cdb_length << 2));
+	*fcp_dl_ptr = fcp_dl;
+}
+
 /**
  * zfcp_fsf_send_fcp_command_task - initiate an FCP command (for a SCSI command)
  * @adapter: adapter where scsi command is issued
@@ -2223,8 +2283,8 @@
 		       ZFCP_STATUS_COMMON_UNBLOCKED)))
 		return -EBUSY;
 
-	spin_lock(&adapter->req_q.lock);
-	if (!atomic_read(&adapter->req_q.count))
+	spin_lock(&adapter->req_q_lock);
+	if (!zfcp_fsf_sbal_available(adapter))
 		goto out;
 	req = zfcp_fsf_req_create(adapter, FSF_QTCB_FCP_CMND, req_flags,
 				  adapter->pool.fsf_req_scsi);
@@ -2286,7 +2346,7 @@
 	memcpy(fcp_cmnd_iu->fcp_cdb, scsi_cmnd->cmnd, scsi_cmnd->cmd_len);
 
 	req->qtcb->bottom.io.fcp_cmnd_length = sizeof(struct fcp_cmnd_iu) +
-		fcp_cmnd_iu->add_fcp_cdb_length + sizeof(fcp_dl_t);
+		fcp_cmnd_iu->add_fcp_cdb_length + sizeof(u32);
 
 	real_bytes = zfcp_qdio_sbals_from_sg(req, sbtype,
 					     scsi_sglist(scsi_cmnd),
@@ -2296,10 +2356,10 @@
 			retval = -EIO;
 		else {
 			dev_err(&adapter->ccw_device->dev,
-				"SCSI request too large. "
-				"Shutting down unit 0x%016Lx on port "
-				"0x%016Lx.\n", unit->fcp_lun,
-				unit->port->wwpn);
+				"Oversize data package, unit 0x%016Lx "
+				"on port 0x%016Lx closed\n",
+				(unsigned long long)unit->fcp_lun,
+				(unsigned long long)unit->port->wwpn);
 			zfcp_erp_unit_shutdown(unit, 0, 131, req);
 			retval = -EINVAL;
 		}
@@ -2322,7 +2382,7 @@
 	zfcp_fsf_req_free(req);
 	scsi_cmnd->host_scribble = NULL;
 out:
-	spin_unlock(&adapter->req_q.lock);
+	spin_unlock(&adapter->req_q_lock);
 	return retval;
 }
 
@@ -2338,7 +2398,7 @@
 					   struct zfcp_unit *unit,
 					   u8 tm_flags, int req_flags)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	struct zfcp_fsf_req *req = NULL;
 	struct fcp_cmnd_iu *fcp_cmnd_iu;
 
@@ -2346,8 +2406,8 @@
 		       ZFCP_STATUS_COMMON_UNBLOCKED)))
 		return NULL;
 
-	spin_lock(&adapter->req_q.lock);
-	if (!atomic_read(&adapter->req_q.count))
+	spin_lock(&adapter->req_q_lock);
+	if (!zfcp_fsf_sbal_available(adapter))
 		goto out;
 	req = zfcp_fsf_req_create(adapter, FSF_QTCB_FCP_CMND, req_flags,
 				  adapter->pool.fsf_req_scsi);
@@ -2362,7 +2422,7 @@
 	req->qtcb->bottom.io.data_direction = FSF_DATADIR_CMND;
 	req->qtcb->bottom.io.service_class = FSF_CLASS_3;
 	req->qtcb->bottom.io.fcp_cmnd_length = 	sizeof(struct fcp_cmnd_iu) +
-						sizeof(fcp_dl_t);
+						sizeof(u32);
 
 	sbale = zfcp_qdio_sbale_req(req);
 	sbale[0].flags |= SBAL_FLAGS0_TYPE_WRITE;
@@ -2379,7 +2439,7 @@
 	zfcp_fsf_req_free(req);
 	req = NULL;
 out:
-	spin_unlock(&adapter->req_q.lock);
+	spin_unlock(&adapter->req_q_lock);
 	return req;
 }
 
@@ -2398,7 +2458,7 @@
 struct zfcp_fsf_req *zfcp_fsf_control_file(struct zfcp_adapter *adapter,
 					   struct zfcp_fsf_cfdc *fsf_cfdc)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	struct zfcp_fsf_req *req = NULL;
 	struct fsf_qtcb_bottom_support *bottom;
 	int direction, retval = -EIO, bytes;
@@ -2417,7 +2477,7 @@
 		return ERR_PTR(-EINVAL);
 	}
 
-	spin_lock_bh(&adapter->req_q.lock);
+	spin_lock_bh(&adapter->req_q_lock);
 	if (zfcp_fsf_req_sbal_get(adapter))
 		goto out;
 
@@ -2447,7 +2507,7 @@
 	zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT);
 	retval = zfcp_fsf_req_send(req);
 out:
-	spin_unlock_bh(&adapter->req_q.lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 
 	if (!retval) {
 		wait_event(req->completion_wq,
diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h
index bf94b4d..fd3a887 100644
--- a/drivers/s390/scsi/zfcp_fsf.h
+++ b/drivers/s390/scsi/zfcp_fsf.h
@@ -71,13 +71,6 @@
 #define FSF_MAXIMUM_NUMBER_OF_LUNS_EXCEEDED	0x00000041
 #define FSF_ELS_COMMAND_REJECTED		0x00000050
 #define FSF_GENERIC_COMMAND_REJECTED		0x00000051
-#define FSF_OPERATION_PARTIALLY_SUCCESSFUL	0x00000052
-#define FSF_AUTHORIZATION_FAILURE		0x00000053
-#define FSF_CFDC_ERROR_DETECTED			0x00000054
-#define FSF_CONTROL_FILE_UPDATE_ERROR		0x00000055
-#define FSF_CONTROL_FILE_TOO_LARGE		0x00000056
-#define FSF_ACCESS_CONFLICT_DETECTED		0x00000057
-#define FSF_CONFLICTS_OVERRULED			0x00000058
 #define FSF_PORT_BOXED				0x00000059
 #define FSF_LUN_BOXED				0x0000005A
 #define FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE	0x0000005B
@@ -85,9 +78,7 @@
 #define FSF_REQUEST_SIZE_TOO_LARGE		0x00000061
 #define FSF_RESPONSE_SIZE_TOO_LARGE		0x00000062
 #define FSF_SBAL_MISMATCH			0x00000063
-#define FSF_OPEN_PORT_WITHOUT_PRLI		0x00000064
 #define FSF_ADAPTER_STATUS_AVAILABLE		0x000000AD
-#define FSF_FCP_RSP_AVAILABLE			0x000000AF
 #define FSF_UNKNOWN_COMMAND			0x000000E2
 #define FSF_UNKNOWN_OP_SUBTYPE                  0x000000E3
 #define FSF_INVALID_COMMAND_OPTION              0x000000E5
@@ -102,20 +93,9 @@
 #define FSF_SQ_RETRY_IF_POSSIBLE		0x02
 #define FSF_SQ_ULP_DEPENDENT_ERP_REQUIRED	0x03
 #define FSF_SQ_INVOKE_LINK_TEST_PROCEDURE	0x04
-#define FSF_SQ_ULP_PROGRAMMING_ERROR		0x05
 #define FSF_SQ_COMMAND_ABORTED			0x06
 #define FSF_SQ_NO_RETRY_POSSIBLE		0x07
 
-/* FSF status qualifier for CFDC commands */
-#define FSF_SQ_CFDC_HARDENED_ON_SE		0x00000000
-#define FSF_SQ_CFDC_COULD_NOT_HARDEN_ON_SE	0x00000001
-#define FSF_SQ_CFDC_COULD_NOT_HARDEN_ON_SE2	0x00000002
-/* CFDC subtable codes */
-#define FSF_SQ_CFDC_SUBTABLE_OS			0x0001
-#define FSF_SQ_CFDC_SUBTABLE_PORT_WWPN		0x0002
-#define FSF_SQ_CFDC_SUBTABLE_PORT_DID		0x0003
-#define FSF_SQ_CFDC_SUBTABLE_LUN		0x0004
-
 /* FSF status qualifier (most significant 4 bytes), local link down */
 #define FSF_PSQ_LINK_NO_LIGHT			0x00000004
 #define FSF_PSQ_LINK_WRAP_PLUG			0x00000008
@@ -145,7 +125,6 @@
 #define FSF_STATUS_READ_LINK_UP          	0x00000006
 #define FSF_STATUS_READ_NOTIFICATION_LOST	0x00000009
 #define FSF_STATUS_READ_CFDC_UPDATED		0x0000000A
-#define FSF_STATUS_READ_CFDC_HARDENED		0x0000000B
 #define FSF_STATUS_READ_FEATURE_UPDATE_ALERT	0x0000000C
 
 /* status subtypes in status read buffer */
@@ -159,20 +138,9 @@
 
 /* status subtypes for unsolicited status notification lost */
 #define FSF_STATUS_READ_SUB_INCOMING_ELS	0x00000001
-#define FSF_STATUS_READ_SUB_SENSE_DATA		0x00000002
-#define FSF_STATUS_READ_SUB_LINK_STATUS		0x00000004
-#define FSF_STATUS_READ_SUB_PORT_CLOSED		0x00000008
-#define FSF_STATUS_READ_SUB_BIT_ERROR_THRESHOLD	0x00000010
 #define FSF_STATUS_READ_SUB_ACT_UPDATED		0x00000020
-#define FSF_STATUS_READ_SUB_ACT_HARDENED	0x00000040
-#define FSF_STATUS_READ_SUB_FEATURE_UPDATE_ALERT 0x00000080
-
-/* status subtypes for CFDC */
-#define FSF_STATUS_READ_SUB_CFDC_HARDENED_ON_SE	0x00000002
-#define FSF_STATUS_READ_SUB_CFDC_HARDENED_ON_SE2 0x0000000F
 
 /* topologie that is detected by the adapter */
-#define FSF_TOPO_ERROR				0x00000000
 #define FSF_TOPO_P2P				0x00000001
 #define FSF_TOPO_FABRIC				0x00000002
 #define FSF_TOPO_AL				0x00000003
@@ -180,17 +148,13 @@
 /* data direction for FCP commands */
 #define FSF_DATADIR_WRITE			0x00000001
 #define FSF_DATADIR_READ			0x00000002
-#define FSF_DATADIR_READ_WRITE			0x00000003
 #define FSF_DATADIR_CMND			0x00000004
 
 /* fc service class */
-#define FSF_CLASS_1				0x00000001
-#define FSF_CLASS_2				0x00000002
 #define FSF_CLASS_3				0x00000003
 
 /* SBAL chaining */
 #define FSF_MAX_SBALS_PER_REQ			36
-#define FSF_MAX_SBALS_PER_ELS_REQ		2
 
 /* logging space behind QTCB */
 #define FSF_QTCB_LOG_SIZE			1024
@@ -200,50 +164,16 @@
 #define FSF_FEATURE_LUN_SHARING			0x00000004
 #define FSF_FEATURE_NOTIFICATION_LOST		0x00000008
 #define FSF_FEATURE_HBAAPI_MANAGEMENT           0x00000010
-#define FSF_FEATURE_ELS_CT_CHAINED_SBALS        0x00000020
 #define FSF_FEATURE_UPDATE_ALERT		0x00000100
 #define FSF_FEATURE_MEASUREMENT_DATA		0x00000200
 
 /* host connection features */
 #define FSF_FEATURE_NPIV_MODE			0x00000001
-#define FSF_FEATURE_VM_ASSIGNED_WWPN		0x00000002
 
 /* option */
 #define FSF_OPEN_LUN_SUPPRESS_BOXING		0x00000001
-#define FSF_OPEN_LUN_REPLICATE_SENSE		0x00000002
-
-/* adapter types */
-#define FSF_ADAPTER_TYPE_FICON                  0x00000001
-#define FSF_ADAPTER_TYPE_FICON_EXPRESS          0x00000002
-
-/* port types */
-#define FSF_HBA_PORTTYPE_UNKNOWN		0x00000001
-#define FSF_HBA_PORTTYPE_NOTPRESENT		0x00000003
-#define FSF_HBA_PORTTYPE_NPORT			0x00000005
-#define FSF_HBA_PORTTYPE_PTP			0x00000021
-/* following are not defined and used by FSF Spec
-   but are additionally defined by FC-HBA */
-#define FSF_HBA_PORTTYPE_OTHER			0x00000002
-#define FSF_HBA_PORTTYPE_NOTPRESENT		0x00000003
-#define FSF_HBA_PORTTYPE_NLPORT			0x00000006
-#define FSF_HBA_PORTTYPE_FLPORT			0x00000007
-#define FSF_HBA_PORTTYPE_FPORT			0x00000008
-#define FSF_HBA_PORTTYPE_LPORT			0x00000020
-
-/* port states */
-#define FSF_HBA_PORTSTATE_UNKNOWN		0x00000001
-#define FSF_HBA_PORTSTATE_ONLINE		0x00000002
-#define FSF_HBA_PORTSTATE_OFFLINE		0x00000003
-#define FSF_HBA_PORTSTATE_LINKDOWN		0x00000006
-#define FSF_HBA_PORTSTATE_ERROR			0x00000007
-
-/* IO states of adapter */
-#define FSF_IOSTAT_NPORT_RJT			0x00000004
-#define FSF_IOSTAT_FABRIC_RJT			0x00000005
-#define FSF_IOSTAT_LS_RJT			0x00000009
 
 /* open LUN access flags*/
-#define FSF_UNIT_ACCESS_OPEN_LUN_ALLOWED	0x01000000
 #define FSF_UNIT_ACCESS_EXCLUSIVE		0x02000000
 #define FSF_UNIT_ACCESS_OUTBOUND_TRANSFER	0x10000000
 
@@ -265,11 +195,6 @@
 	u32 res1;
 } __attribute__ ((packed));
 
-struct fsf_port_closed_payload {
-	struct fsf_queue_designator queue_designator;
-	u32                         port_handle;
-} __attribute__ ((packed));
-
 struct fsf_bit_error_payload {
 	u32 res1;
 	u32 link_failure_error_count;
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 69d632d..3e05080 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -28,7 +28,7 @@
 	return 0;
 }
 
-static volatile struct qdio_buffer_element *
+static struct qdio_buffer_element *
 zfcp_qdio_sbale(struct zfcp_qdio_queue *q, int sbal_idx, int sbale_idx)
 {
 	return &q->sbal[sbal_idx]->element[sbale_idx];
@@ -57,7 +57,7 @@
 
 static void zfcp_qdio_handler_error(struct zfcp_adapter *adapter, u8 id)
 {
-	dev_warn(&adapter->ccw_device->dev, "QDIO problem occurred.\n");
+	dev_warn(&adapter->ccw_device->dev, "A QDIO problem occurred\n");
 
 	zfcp_erp_adapter_reopen(adapter,
 				ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED |
@@ -145,7 +145,7 @@
 {
 	struct zfcp_adapter *adapter = (struct zfcp_adapter *) parm;
 	struct zfcp_qdio_queue *queue = &adapter->resp_q;
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	int sbal_idx, sbale_idx, sbal_no;
 
 	if (unlikely(qdio_err)) {
@@ -174,8 +174,8 @@
 
 		if (unlikely(!(sbale->flags & SBAL_FLAGS_LAST_ENTRY)))
 			dev_warn(&adapter->ccw_device->dev,
-				 "Protocol violation by adapter. "
-				 "Continuing operations.\n");
+				 "A QDIO protocol error occurred, "
+				 "operations continue\n");
 	}
 
 	/*
@@ -190,8 +190,7 @@
  * @fsf_req: pointer to struct fsf_req
  * Returns: pointer to qdio_buffer_element (SBALE) structure
  */
-volatile struct qdio_buffer_element *
-zfcp_qdio_sbale_req(struct zfcp_fsf_req *req)
+struct qdio_buffer_element *zfcp_qdio_sbale_req(struct zfcp_fsf_req *req)
 {
 	return zfcp_qdio_sbale(&req->adapter->req_q, req->sbal_last, 0);
 }
@@ -201,8 +200,7 @@
  * @fsf_req: pointer to struct fsf_req
  * Returns: pointer to qdio_buffer_element (SBALE) structure
  */
-volatile struct qdio_buffer_element *
-zfcp_qdio_sbale_curr(struct zfcp_fsf_req *req)
+struct qdio_buffer_element *zfcp_qdio_sbale_curr(struct zfcp_fsf_req *req)
 {
 	return zfcp_qdio_sbale(&req->adapter->req_q, req->sbal_last,
 			       req->sbale_curr);
@@ -216,10 +214,10 @@
 					% QDIO_MAX_BUFFERS_PER_Q;
 }
 
-static volatile struct qdio_buffer_element *
+static struct qdio_buffer_element *
 zfcp_qdio_sbal_chain(struct zfcp_fsf_req *fsf_req, unsigned long sbtype)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 
 	/* set last entry flag in current SBALE of current SBAL */
 	sbale = zfcp_qdio_sbale_curr(fsf_req);
@@ -250,7 +248,7 @@
 	return sbale;
 }
 
-static volatile struct qdio_buffer_element *
+static struct qdio_buffer_element *
 zfcp_qdio_sbale_next(struct zfcp_fsf_req *fsf_req, unsigned long sbtype)
 {
 	if (fsf_req->sbale_curr == ZFCP_LAST_SBALE_PER_SBAL)
@@ -273,7 +271,7 @@
 				unsigned int sbtype, void *start_addr,
 				unsigned int total_length)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	unsigned long remaining, length;
 	void *addr;
 
@@ -282,6 +280,7 @@
 	     addr += length, remaining -= length) {
 		sbale = zfcp_qdio_sbale_next(fsf_req, sbtype);
 		if (!sbale) {
+			atomic_inc(&fsf_req->adapter->qdio_outb_full);
 			zfcp_qdio_undo_sbals(fsf_req);
 			return -EINVAL;
 		}
@@ -307,7 +306,7 @@
 int zfcp_qdio_sbals_from_sg(struct zfcp_fsf_req *fsf_req, unsigned long sbtype,
 			    struct scatterlist *sg, int max_sbals)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	int retval, bytes = 0;
 
 	/* figure out last allowed SBAL */
@@ -344,10 +343,10 @@
 	int first = fsf_req->sbal_first;
 	int count = fsf_req->sbal_number;
 	int retval, pci, pci_batch;
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 
 	/* acknowledgements for transferred buffers */
-	pci_batch = req_q->pci_batch + count;
+	pci_batch = adapter->req_q_pci_batch + count;
 	if (unlikely(pci_batch >= ZFCP_QDIO_PCI_INTERVAL)) {
 		pci_batch %= ZFCP_QDIO_PCI_INTERVAL;
 		pci = first + count - (pci_batch + 1);
@@ -367,7 +366,7 @@
 	atomic_sub(count, &req_q->count);
 	req_q->first += count;
 	req_q->first %= QDIO_MAX_BUFFERS_PER_Q;
-	req_q->pci_batch = pci_batch;
+	adapter->req_q_pci_batch = pci_batch;
 	return 0;
 }
 
@@ -418,14 +417,14 @@
 	struct zfcp_qdio_queue *req_q;
 	int first, count;
 
-	if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status))
+	if (!(atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_QDIOUP))
 		return;
 
 	/* clear QDIOUP flag, thus do_QDIO is not called during qdio_shutdown */
 	req_q = &adapter->req_q;
-	spin_lock_bh(&req_q->lock);
+	spin_lock_bh(&adapter->req_q_lock);
 	atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
-	spin_unlock_bh(&req_q->lock);
+	spin_unlock_bh(&adapter->req_q_lock);
 
 	qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR);
 
@@ -438,7 +437,7 @@
 	}
 	req_q->first = 0;
 	atomic_set(&req_q->count, 0);
-	req_q->pci_batch = 0;
+	adapter->req_q_pci_batch = 0;
 	adapter->resp_q.first = 0;
 	atomic_set(&adapter->resp_q.count, 0);
 }
@@ -450,23 +449,17 @@
  */
 int zfcp_qdio_open(struct zfcp_adapter *adapter)
 {
-	volatile struct qdio_buffer_element *sbale;
+	struct qdio_buffer_element *sbale;
 	int cc;
 
-	if (atomic_test_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status))
+	if (atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_QDIOUP)
 		return -EIO;
 
-	if (qdio_establish(&adapter->qdio_init_data)) {
-		dev_err(&adapter->ccw_device->dev,
-			 "Establish of QDIO queues failed.\n");
-		return -EIO;
-	}
+	if (qdio_establish(&adapter->qdio_init_data))
+		goto failed_establish;
 
-	if (qdio_activate(adapter->ccw_device)) {
-		dev_err(&adapter->ccw_device->dev,
-			 "Activate of QDIO queues failed.\n");
+	if (qdio_activate(adapter->ccw_device))
 		goto failed_qdio;
-	}
 
 	for (cc = 0; cc < QDIO_MAX_BUFFERS_PER_Q; cc++) {
 		sbale = &(adapter->resp_q.sbal[cc]->element[0]);
@@ -476,20 +469,20 @@
 	}
 
 	if (do_QDIO(adapter->ccw_device, QDIO_FLAG_SYNC_INPUT, 0, 0,
-		     QDIO_MAX_BUFFERS_PER_Q)) {
-		dev_err(&adapter->ccw_device->dev,
-			 "Init of QDIO response queue failed.\n");
+		     QDIO_MAX_BUFFERS_PER_Q))
 		goto failed_qdio;
-	}
 
 	/* set index of first avalable SBALS / number of available SBALS */
 	adapter->req_q.first = 0;
 	atomic_set(&adapter->req_q.count, QDIO_MAX_BUFFERS_PER_Q);
-	adapter->req_q.pci_batch = 0;
+	adapter->req_q_pci_batch = 0;
 
 	return 0;
 
 failed_qdio:
 	qdio_shutdown(adapter->ccw_device, QDIO_FLAG_CLEANUP_USING_CLEAR);
+failed_establish:
+	dev_err(&adapter->ccw_device->dev,
+		"Setting up the QDIO connection to the FCP adapter failed\n");
 	return -EIO;
 }
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index aeae56b..ca8f85f 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -21,20 +21,6 @@
 	return fcp_sns_info_ptr;
 }
 
-void zfcp_set_fcp_dl(struct fcp_cmnd_iu *fcp_cmd, fcp_dl_t fcp_dl)
-{
-	fcp_dl_t *fcp_dl_ptr;
-
-	/*
-	 * fcp_dl_addr = start address of fcp_cmnd structure +
-	 * size of fixed part + size of dynamically sized add_dcp_cdb field
-	 * SEE FCP-2 documentation
-	 */
-	fcp_dl_ptr = (fcp_dl_t *) ((unsigned char *) &fcp_cmd[1] +
-				   (fcp_cmd->add_fcp_cdb_length << 2));
-	*fcp_dl_ptr = fcp_dl;
-}
-
 static void zfcp_scsi_slave_destroy(struct scsi_device *sdpnt)
 {
 	struct zfcp_unit *unit = (struct zfcp_unit *) sdpnt->hostdata;
@@ -119,13 +105,17 @@
 {
 	struct zfcp_port *port;
 	struct zfcp_unit *unit;
+	int scsi_lun;
 
 	list_for_each_entry(port, &adapter->port_list_head, list) {
 		if (!port->rport || (id != port->rport->scsi_target_id))
 			continue;
-		list_for_each_entry(unit, &port->unit_list_head, list)
-			if (lun == unit->scsi_lun)
+		list_for_each_entry(unit, &port->unit_list_head, list) {
+			scsi_lun = scsilun_to_int(
+				(struct scsi_lun *)&unit->fcp_lun);
+			if (lun == scsi_lun)
 				return unit;
+		}
 	}
 
 	return NULL;
@@ -183,7 +173,6 @@
 		return retval;
 	}
 	fsf_req->data = NULL;
-	fsf_req->status |= ZFCP_STATUS_FSFREQ_ABORTING;
 
 	/* don't access old fsf_req after releasing the abort_lock */
 	write_unlock_irqrestore(&adapter->abort_lock, flags);
@@ -294,7 +283,8 @@
 					     sizeof (struct zfcp_adapter *));
 	if (!adapter->scsi_host) {
 		dev_err(&adapter->ccw_device->dev,
-			"registration with SCSI stack failed.");
+			"Registering the FCP device with the "
+			"SCSI stack failed\n");
 		return -EIO;
 	}
 
@@ -312,7 +302,6 @@
 		scsi_host_put(adapter->scsi_host);
 		return -EIO;
 	}
-	atomic_set_mask(ZFCP_STATUS_ADAPTER_REGISTERED, &adapter->status);
 
 	return 0;
 }
@@ -336,7 +325,6 @@
 	scsi_remove_host(shost);
 	scsi_host_put(shost);
 	adapter->scsi_host = NULL;
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_REGISTERED, &adapter->status);
 
 	return;
 }
diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c
index 2e85c6c..2809d78 100644
--- a/drivers/s390/scsi/zfcp_sysfs.c
+++ b/drivers/s390/scsi/zfcp_sysfs.c
@@ -26,9 +26,9 @@
 ZFCP_DEFINE_ATTR(zfcp_adapter, adapter, status, "0x%08x\n",
 		 atomic_read(&adapter->status));
 ZFCP_DEFINE_ATTR(zfcp_adapter, adapter, peer_wwnn, "0x%016llx\n",
-		 adapter->peer_wwnn);
+		 (unsigned long long) adapter->peer_wwnn);
 ZFCP_DEFINE_ATTR(zfcp_adapter, adapter, peer_wwpn, "0x%016llx\n",
-		 adapter->peer_wwpn);
+		 (unsigned long long) adapter->peer_wwpn);
 ZFCP_DEFINE_ATTR(zfcp_adapter, adapter, peer_d_id, "0x%06x\n",
 		 adapter->peer_d_id);
 ZFCP_DEFINE_ATTR(zfcp_adapter, adapter, card_version, "0x%04x\n",
@@ -135,8 +135,9 @@
 {
 	struct zfcp_adapter *adapter = dev_get_drvdata(dev);
 	struct zfcp_port *port;
-	wwn_t wwpn;
+	u64 wwpn;
 	int retval = 0;
+	LIST_HEAD(port_remove_lh);
 
 	down(&zfcp_data.config_sema);
 	if (atomic_read(&adapter->status) & ZFCP_STATUS_COMMON_REMOVE) {
@@ -144,7 +145,7 @@
 		goto out;
 	}
 
-	if (strict_strtoull(buf, 0, &wwpn)) {
+	if (strict_strtoull(buf, 0, (unsigned long long *) &wwpn)) {
 		retval = -EINVAL;
 		goto out;
 	}
@@ -154,7 +155,7 @@
 	if (port && (atomic_read(&port->refcount) == 0)) {
 		zfcp_port_get(port);
 		atomic_set_mask(ZFCP_STATUS_COMMON_REMOVE, &port->status);
-		list_move(&port->list, &adapter->port_remove_lh);
+		list_move(&port->list, &port_remove_lh);
 	} else
 		port = NULL;
 	write_unlock_irq(&zfcp_data.config_lock);
@@ -200,7 +201,7 @@
 {
 	struct zfcp_port *port = dev_get_drvdata(dev);
 	struct zfcp_unit *unit;
-	fcp_lun_t fcp_lun;
+	u64 fcp_lun;
 	int retval = -EINVAL;
 
 	down(&zfcp_data.config_sema);
@@ -209,7 +210,7 @@
 		goto out;
 	}
 
-	if (strict_strtoull(buf, 0, &fcp_lun))
+	if (strict_strtoull(buf, 0, (unsigned long long *) &fcp_lun))
 		goto out;
 
 	unit = zfcp_unit_enqueue(port, fcp_lun);
@@ -233,8 +234,9 @@
 {
 	struct zfcp_port *port = dev_get_drvdata(dev);
 	struct zfcp_unit *unit;
-	fcp_lun_t fcp_lun;
+	u64 fcp_lun;
 	int retval = 0;
+	LIST_HEAD(unit_remove_lh);
 
 	down(&zfcp_data.config_sema);
 	if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_REMOVE) {
@@ -242,7 +244,7 @@
 		goto out;
 	}
 
-	if (strict_strtoull(buf, 0, &fcp_lun)) {
+	if (strict_strtoull(buf, 0, (unsigned long long *) &fcp_lun)) {
 		retval = -EINVAL;
 		goto out;
 	}
@@ -252,7 +254,7 @@
 	if (unit && (atomic_read(&unit->refcount) == 0)) {
 		zfcp_unit_get(unit);
 		atomic_set_mask(ZFCP_STATUS_COMMON_REMOVE, &unit->status);
-		list_move(&unit->list, &port->unit_remove_lh);
+		list_move(&unit->list, &unit_remove_lh);
 	} else
 		unit = NULL;
 
@@ -273,22 +275,7 @@
 }
 static DEVICE_ATTR(unit_remove, S_IWUSR, NULL, zfcp_sysfs_unit_remove_store);
 
-static struct attribute *zfcp_port_ns_attrs[] = {
-	&dev_attr_port_failed.attr,
-	&dev_attr_port_in_recovery.attr,
-	&dev_attr_port_status.attr,
-	&dev_attr_port_access_denied.attr,
-	NULL
-};
-
-/**
- * zfcp_sysfs_ns_port_attrs - sysfs attributes for nameserver
- */
-struct attribute_group zfcp_sysfs_ns_port_attrs = {
-	.attrs = zfcp_port_ns_attrs,
-};
-
-static struct attribute *zfcp_port_no_ns_attrs[] = {
+static struct attribute *zfcp_port_attrs[] = {
 	&dev_attr_unit_add.attr,
 	&dev_attr_unit_remove.attr,
 	&dev_attr_port_failed.attr,
@@ -302,7 +289,7 @@
  * zfcp_sysfs_port_attrs - sysfs attributes for all other ports
  */
 struct attribute_group zfcp_sysfs_port_attrs = {
-	.attrs = zfcp_port_no_ns_attrs,
+	.attrs = zfcp_port_attrs,
 };
 
 static struct attribute *zfcp_unit_attrs[] = {
@@ -395,8 +382,10 @@
 
 ZFCP_DEFINE_SCSI_ATTR(hba_id, "%s\n",
 	unit->port->adapter->ccw_device->dev.bus_id);
-ZFCP_DEFINE_SCSI_ATTR(wwpn, "0x%016llx\n", unit->port->wwpn);
-ZFCP_DEFINE_SCSI_ATTR(fcp_lun, "0x%016llx\n", unit->fcp_lun);
+ZFCP_DEFINE_SCSI_ATTR(wwpn, "0x%016llx\n",
+		      (unsigned long long) unit->port->wwpn);
+ZFCP_DEFINE_SCSI_ATTR(fcp_lun, "0x%016llx\n",
+		      (unsigned long long) unit->fcp_lun);
 
 struct device_attribute *zfcp_sysfs_sdev_attrs[] = {
 	&dev_attr_fcp_lun,
@@ -487,10 +476,23 @@
 ZFCP_SHOST_ATTR(seconds_active, "%llu\n",
 		(unsigned long long) stat_info.seconds_act);
 
+static ssize_t zfcp_sysfs_adapter_q_full_show(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf)
+{
+	struct Scsi_Host *scsi_host = class_to_shost(dev);
+	struct zfcp_adapter *adapter =
+		(struct zfcp_adapter *) scsi_host->hostdata[0];
+
+	return sprintf(buf, "%d\n", atomic_read(&adapter->qdio_outb_full));
+}
+static DEVICE_ATTR(queue_full, S_IRUGO, zfcp_sysfs_adapter_q_full_show, NULL);
+
 struct device_attribute *zfcp_sysfs_shost_attrs[] = {
 	&dev_attr_utilization,
 	&dev_attr_requests,
 	&dev_attr_megabytes,
 	&dev_attr_seconds_active,
+	&dev_attr_queue_full,
 	NULL
 };
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 4e0322b..d3b211a 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1325,14 +1325,6 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called qlogicfas.
 
-config SCSI_QLOGIC_FC_FIRMWARE
-	bool "Include loadable firmware in driver"
-	depends on SCSI_QLOGIC_FC
-  	help
-	  Say Y to include ISP2X00 Fabric Initiator/Target Firmware, with
-	  expanded LUN addressing and FcTape (FCP-2) support, in the
-	  qlogicfc driver. This is required on some platforms.
-
 config SCSI_QLOGIC_1280
 	tristate "Qlogic QLA 1240/1x80/1x160 SCSI support"
 	depends on PCI && SCSI
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index aa4e77c..8abfd06 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -1139,7 +1139,7 @@
 	srbcmd->id       = cpu_to_le32(scmd_id(cmd));
 	srbcmd->lun      = cpu_to_le32(cmd->device->lun);
 	srbcmd->flags    = cpu_to_le32(flag);
-	timeout = cmd->timeout_per_command/HZ;
+	timeout = cmd->request->timeout/HZ;
 	if (timeout == 0)
 		timeout = 1;
 	srbcmd->timeout  = cpu_to_le32(timeout);  // timeout in seconds
diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index ef693e84..8f45570 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -84,7 +84,7 @@
 	/*
 	 * I/O buffer for both MODE_SELECT and INQUIRY commands.
 	 */
-	char buffer[CLARIION_BUFFER_SIZE];
+	unsigned char buffer[CLARIION_BUFFER_SIZE];
 	/*
 	 * SCSI sense buffer for commands -- assumes serial issuance
 	 * and completion sequence of all commands for same multipath.
@@ -176,7 +176,7 @@
 		err = SCSI_DH_DEV_TEMP_BUSY;
 		goto out;
 	}
-	if (csdev->buffer[4] < 0 || csdev->buffer[4] > 2) {
+	if (csdev->buffer[4] > 2) {
 		/* Invalid buffer format */
 		sdev_printk(KERN_NOTICE, sdev,
 			    "%s: invalid VPD page 0xC0 format\n",
@@ -278,7 +278,6 @@
 		return NULL;
 	}
 
-	memset(rq->cmd, 0, BLK_MAX_CDB);
 	rq->cmd_len = COMMAND_SIZE(cmd);
 	rq->cmd[0] = cmd;
 
diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
index a6a4ef3..5e93c88a 100644
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
@@ -114,7 +114,6 @@
 	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= REQ_FAILFAST;
 	req->cmd_len = COMMAND_SIZE(TEST_UNIT_READY);
-	memset(req->cmd, 0, MAX_COMMAND_SIZE);
 	req->cmd[0] = TEST_UNIT_READY;
 	req->timeout = HP_SW_TIMEOUT;
 	req->sense = h->sense;
@@ -207,7 +206,6 @@
 	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= REQ_FAILFAST;
 	req->cmd_len = COMMAND_SIZE(START_STOP);
-	memset(req->cmd, 0, MAX_COMMAND_SIZE);
 	req->cmd[0] = START_STOP;
 	req->cmd[4] = 1;	/* Start spin cycle */
 	req->timeout = HP_SW_TIMEOUT;
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 6e2f130..50bf95f 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -225,8 +225,6 @@
 		return NULL;
 	}
 
-	memset(rq->cmd, 0, BLK_MAX_CDB);
-
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
 	rq->retries = RDAC_RETRIES;
@@ -590,6 +588,8 @@
 	{"STK", "OPENstorage D280"},
 	{"SUN", "CSM200_R"},
 	{"SUN", "LCSM100_F"},
+	{"DELL", "MD3000"},
+	{"DELL", "MD3000i"},
 	{NULL, NULL},
 };
 
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index 822d521..c387c15 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -464,7 +464,6 @@
 
     /* use request field to save the ptr. to completion struct. */
     scp->request = (struct request *)&wait;
-    scp->timeout_per_command = timeout*HZ;
     scp->cmd_len = 12;
     scp->cmnd = cmnd;
     cmndinfo.priority = IOCTL_PRI;
@@ -1995,23 +1994,12 @@
     register Scsi_Cmnd *pscp;
     register Scsi_Cmnd *nscp;
     ulong flags;
-    unchar b, t;
 
     TRACE(("gdth_putq() priority %d\n",priority));
     spin_lock_irqsave(&ha->smp_lock, flags);
 
-    if (!cmndinfo->internal_command) {
+    if (!cmndinfo->internal_command)
         cmndinfo->priority = priority;
-        b = scp->device->channel;
-        t = scp->device->id;
-        if (priority >= DEFAULT_PRI) {
-            if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha,b)].lock) ||
-                (b==ha->virt_bus && t<MAX_HDRIVES && ha->hdr[t].lock)) {
-                TRACE2(("gdth_putq(): locked IO ->update_timeout()\n"));
-                cmndinfo->timeout = gdth_update_timeout(scp, 0);
-            }
-        }
-    }
 
     if (ha->req_first==NULL) {
         ha->req_first = scp;                    /* queue was empty */
@@ -3899,6 +3887,39 @@
     return ((const char *)ha->binfo.type_string);
 }
 
+static enum blk_eh_timer_return gdth_timed_out(struct scsi_cmnd *scp)
+{
+	gdth_ha_str *ha = shost_priv(scp->device->host);
+	struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp);
+	unchar b, t;
+	ulong flags;
+	enum blk_eh_timer_return retval = BLK_EH_NOT_HANDLED;
+
+	TRACE(("%s() cmd 0x%x\n", scp->cmnd[0], __func__));
+	b = scp->device->channel;
+	t = scp->device->id;
+
+	/*
+	 * We don't really honor the command timeout, but we try to
+	 * honor 6 times of the actual command timeout! So reset the
+	 * timer if this is less than 6th timeout on this command!
+	 */
+	if (++cmndinfo->timeout_count < 6)
+		retval = BLK_EH_RESET_TIMER;
+
+	/* Reset the timeout if it is locked IO */
+	spin_lock_irqsave(&ha->smp_lock, flags);
+	if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha, b)].lock) ||
+	    (b == ha->virt_bus && t < MAX_HDRIVES && ha->hdr[t].lock)) {
+		TRACE2(("%s(): locked IO, reset timeout\n", __func__));
+		retval = BLK_EH_RESET_TIMER;
+	}
+	spin_unlock_irqrestore(&ha->smp_lock, flags);
+
+	return retval;
+}
+
+
 static int gdth_eh_bus_reset(Scsi_Cmnd *scp)
 {
     gdth_ha_str *ha = shost_priv(scp->device->host);
@@ -3992,7 +4013,7 @@
     BUG_ON(!cmndinfo);
 
     scp->scsi_done = done;
-    gdth_update_timeout(scp, scp->timeout_per_command * 6);
+    cmndinfo->timeout_count = 0;
     cmndinfo->priority = DEFAULT_PRI;
 
     return __gdth_queuecommand(ha, scp, cmndinfo);
@@ -4096,12 +4117,10 @@
             ha->hdr[j].lock = 1;
             spin_unlock_irqrestore(&ha->smp_lock, flags);
             gdth_wait_completion(ha, ha->bus_cnt, j);
-            gdth_stop_timeout(ha, ha->bus_cnt, j);
         } else {
             spin_lock_irqsave(&ha->smp_lock, flags);
             ha->hdr[j].lock = 0;
             spin_unlock_irqrestore(&ha->smp_lock, flags);
-            gdth_start_timeout(ha, ha->bus_cnt, j);
             gdth_next(ha);
         }
     } 
@@ -4539,18 +4558,14 @@
                 spin_lock_irqsave(&ha->smp_lock, flags);
                 ha->raw[i].lock = 1;
                 spin_unlock_irqrestore(&ha->smp_lock, flags);
-                for (j = 0; j < ha->tid_cnt; ++j) {
+		for (j = 0; j < ha->tid_cnt; ++j)
                     gdth_wait_completion(ha, i, j);
-                    gdth_stop_timeout(ha, i, j);
-                }
             } else {
                 spin_lock_irqsave(&ha->smp_lock, flags);
                 ha->raw[i].lock = 0;
                 spin_unlock_irqrestore(&ha->smp_lock, flags);
-                for (j = 0; j < ha->tid_cnt; ++j) {
-                    gdth_start_timeout(ha, i, j);
+		for (j = 0; j < ha->tid_cnt; ++j)
                     gdth_next(ha);
-                }
             }
         } 
         break;
@@ -4644,6 +4659,7 @@
         .slave_configure        = gdth_slave_configure,
         .bios_param             = gdth_bios_param,
         .proc_info              = gdth_proc_info,
+	.eh_timed_out		= gdth_timed_out,
         .proc_name              = "gdth",
         .can_queue              = GDTH_MAXCMDS,
         .this_id                = -1,
diff --git a/drivers/scsi/gdth.h b/drivers/scsi/gdth.h
index ca92476..1646444 100644
--- a/drivers/scsi/gdth.h
+++ b/drivers/scsi/gdth.h
@@ -916,7 +916,7 @@
         gdth_cmd_str *internal_cmd_str;         /* crier for internal messages*/
         dma_addr_t sense_paddr;                 /* sense dma-addr */
         unchar priority;
-        int timeout;
+	int timeout_count;			/* # of timeout calls */
         volatile int wait_for_completion;
         ushort status;
         ulong32 info;
diff --git a/drivers/scsi/gdth_proc.c b/drivers/scsi/gdth_proc.c
index ce0228e..59349a3 100644
--- a/drivers/scsi/gdth_proc.c
+++ b/drivers/scsi/gdth_proc.c
@@ -748,69 +748,3 @@
     }
     spin_unlock_irqrestore(&ha->smp_lock, flags);
 }
-
-static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id)
-{
-    ulong flags;
-    Scsi_Cmnd *scp;
-    unchar b, t;
-
-    spin_lock_irqsave(&ha->smp_lock, flags);
-
-    for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) {
-        struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp);
-        if (!cmndinfo->internal_command) {
-            b = scp->device->channel;
-            t = scp->device->id;
-            if (t == (unchar)id && b == (unchar)busnum) {
-                TRACE2(("gdth_stop_timeout(): update_timeout()\n"));
-                cmndinfo->timeout = gdth_update_timeout(scp, 0);
-            }
-        }
-    }
-    spin_unlock_irqrestore(&ha->smp_lock, flags);
-}
-
-static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id)
-{
-    ulong flags;
-    Scsi_Cmnd *scp;
-    unchar b, t;
-
-    spin_lock_irqsave(&ha->smp_lock, flags);
-
-    for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) {
-        struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp);
-        if (!cmndinfo->internal_command) {
-            b = scp->device->channel;
-            t = scp->device->id;
-            if (t == (unchar)id && b == (unchar)busnum) {
-                TRACE2(("gdth_start_timeout(): update_timeout()\n"));
-                gdth_update_timeout(scp, cmndinfo->timeout);
-            }
-        }
-    }
-    spin_unlock_irqrestore(&ha->smp_lock, flags);
-}
-
-static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout)
-{
-    int oldto;
-
-    oldto = scp->timeout_per_command;
-    scp->timeout_per_command = timeout;
-
-    if (timeout == 0) {
-        del_timer(&scp->eh_timeout);
-        scp->eh_timeout.data = (unsigned long) NULL;
-        scp->eh_timeout.expires = 0;
-    } else {
-        if (scp->eh_timeout.data != (unsigned long) NULL) 
-            del_timer(&scp->eh_timeout);
-        scp->eh_timeout.data = (unsigned long) scp;
-        scp->eh_timeout.expires = jiffies + timeout;
-        add_timer(&scp->eh_timeout);
-    }
-
-    return oldto;
-}
diff --git a/drivers/scsi/gdth_proc.h b/drivers/scsi/gdth_proc.h
index 45e6fda..9b900cc 100644
--- a/drivers/scsi/gdth_proc.h
+++ b/drivers/scsi/gdth_proc.h
@@ -20,9 +20,6 @@
                               ulong64 *paddr);
 static void gdth_ioctl_free(gdth_ha_str *ha, int size, char *buf, ulong64 paddr);
 static void gdth_wait_completion(gdth_ha_str *ha, int busnum, int id);
-static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id);
-static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id);
-static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout);
 
 #endif
 
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index fed0b02..3fdbb13 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -464,7 +464,7 @@
 struct Scsi_Host *scsi_host_lookup(unsigned short hostnum)
 {
 	struct device *cdev;
-	struct Scsi_Host *shost = ERR_PTR(-ENXIO);
+	struct Scsi_Host *shost = NULL;
 
 	cdev = class_find_device(&shost_class, NULL, &hostnum,
 				 __scsi_host_match);
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 7b1502c..87e09f3 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -756,7 +756,7 @@
 	init_event_struct(evt_struct,
 			  handle_cmd_rsp,
 			  VIOSRP_SRP_FORMAT,
-			  cmnd->timeout_per_command/HZ);
+			  cmnd->request->timeout/HZ);
 
 	evt_struct->cmnd = cmnd;
 	evt_struct->cmnd_done = done;
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 461331d..81c16cb 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -612,7 +612,7 @@
 	pc->req_xfer = pc->buf_size = scsi_bufflen(cmd);
 	pc->scsi_cmd = cmd;
 	pc->done = done;
-	pc->timeout = jiffies + cmd->timeout_per_command;
+	pc->timeout = jiffies + cmd->request->timeout;
 
 	if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) {
 		printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number);
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index e7a3a65..d30eb7b 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -3670,7 +3670,8 @@
 			sdev->no_uld_attach = 1;
 		}
 		if (ipr_is_vset_device(res)) {
-			sdev->timeout = IPR_VSET_RW_TIMEOUT;
+			blk_queue_rq_timeout(sdev->request_queue,
+					     IPR_VSET_RW_TIMEOUT);
 			blk_queue_max_sectors(sdev->request_queue, IPR_VSET_MAX_SECTORS);
 		}
 		if (ipr_is_vset_device(res) || ipr_is_scsi_disk(res))
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index bc9e6dd..ef683f0 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -3818,7 +3818,7 @@
 		scb->cmd.dcdb.segment_4G = 0;
 		scb->cmd.dcdb.enhanced_sg = 0;
 
-		TimeOut = scb->scsi_cmd->timeout_per_command;
+		TimeOut = scb->scsi_cmd->request->timeout;
 
 		if (ha->subsys->param[4] & 0x00100000) {	/* If NEW Tape DCDB is Supported */
 			if (!scb->sg_len) {
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 299e075..da7b67d 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1456,7 +1456,7 @@
 		if (lun == task->sc->device->lun || lun == -1) {
 			debug_scsi("failing in progress sc %p itt 0x%x\n",
 				   task->sc, task->itt);
-			fail_command(conn, task, DID_BUS_BUSY << 16);
+			fail_command(conn, task, error << 16);
 		}
 	}
 }
@@ -1476,12 +1476,12 @@
 		scsi_queue_work(conn->session->host, &conn->xmitwork);
 }
 
-static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
+static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
 {
 	struct iscsi_cls_session *cls_session;
 	struct iscsi_session *session;
 	struct iscsi_conn *conn;
-	enum scsi_eh_timer_return rc = EH_NOT_HANDLED;
+	enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
 
 	cls_session = starget_to_session(scsi_target(scmd->device));
 	session = cls_session->dd_data;
@@ -1494,14 +1494,14 @@
 		 * We are probably in the middle of iscsi recovery so let
 		 * that complete and handle the error.
 		 */
-		rc = EH_RESET_TIMER;
+		rc = BLK_EH_RESET_TIMER;
 		goto done;
 	}
 
 	conn = session->leadconn;
 	if (!conn) {
 		/* In the middle of shuting down */
-		rc = EH_RESET_TIMER;
+		rc = BLK_EH_RESET_TIMER;
 		goto done;
 	}
 
@@ -1513,20 +1513,21 @@
 	 */
 	if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) +
 			    (conn->ping_timeout * HZ), jiffies))
-		rc = EH_RESET_TIMER;
+		rc = BLK_EH_RESET_TIMER;
 	/*
 	 * if we are about to check the transport then give the command
 	 * more time
 	 */
 	if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ),
 			   jiffies))
-		rc = EH_RESET_TIMER;
+		rc = BLK_EH_RESET_TIMER;
 	/* if in the middle of checking the transport then give us more time */
 	if (conn->ping_task)
-		rc = EH_RESET_TIMER;
+		rc = BLK_EH_RESET_TIMER;
 done:
 	spin_unlock(&session->lock);
-	debug_scsi("return %s\n", rc == EH_RESET_TIMER ? "timer reset" : "nh");
+	debug_scsi("return %s\n", rc == BLK_EH_RESET_TIMER ?
+					"timer reset" : "nh");
 	return rc;
 }
 
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 48ee8c7..e155011 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -294,10 +294,10 @@
 	}
 }
 
-static int sas_ata_scr_write(struct ata_port *ap, unsigned int sc_reg_in,
+static int sas_ata_scr_write(struct ata_link *link, unsigned int sc_reg_in,
 			      u32 val)
 {
-	struct domain_device *dev = ap->private_data;
+	struct domain_device *dev = link->ap->private_data;
 
 	SAS_DPRINTK("STUB %s\n", __func__);
 	switch (sc_reg_in) {
@@ -319,10 +319,10 @@
 	return 0;
 }
 
-static int sas_ata_scr_read(struct ata_port *ap, unsigned int sc_reg_in,
+static int sas_ata_scr_read(struct ata_link *link, unsigned int sc_reg_in,
 			    u32 *val)
 {
-	struct domain_device *dev = ap->private_data;
+	struct domain_device *dev = link->ap->private_data;
 
 	SAS_DPRINTK("STUB %s\n", __func__);
 	switch (sc_reg_in) {
@@ -398,7 +398,7 @@
 
 	/* Bounce SCSI-initiated commands to the SCSI EH */
 	if (qc->scsicmd) {
-		scsi_req_abort_cmd(qc->scsicmd);
+		blk_abort_request(qc->scsicmd->request);
 		scsi_schedule_eh(qc->scsicmd->device->host);
 		return;
 	}
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index b4f9368..0001374 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -55,7 +55,7 @@
 int  sas_register_ports(struct sas_ha_struct *sas_ha);
 void sas_unregister_ports(struct sas_ha_struct *sas_ha);
 
-enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *);
+enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *);
 
 int  sas_init_queue(struct sas_ha_struct *sas_ha);
 int  sas_init_events(struct sas_ha_struct *sas_ha);
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index a8e3ef3..7448387 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -673,43 +673,43 @@
 	return;
 }
 
-enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
+enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
 {
 	struct sas_task *task = TO_SAS_TASK(cmd);
 	unsigned long flags;
 
 	if (!task) {
-		cmd->timeout_per_command /= 2;
+		cmd->request->timeout /= 2;
 		SAS_DPRINTK("command 0x%p, task 0x%p, gone: %s\n",
-			    cmd, task, (cmd->timeout_per_command ?
-			    "EH_RESET_TIMER" : "EH_NOT_HANDLED"));
-		if (!cmd->timeout_per_command)
-			return EH_NOT_HANDLED;
-		return EH_RESET_TIMER;
+			    cmd, task, (cmd->request->timeout ?
+			    "BLK_EH_RESET_TIMER" : "BLK_EH_NOT_HANDLED"));
+		if (!cmd->request->timeout)
+			return BLK_EH_NOT_HANDLED;
+		return BLK_EH_RESET_TIMER;
 	}
 
 	spin_lock_irqsave(&task->task_state_lock, flags);
 	BUG_ON(task->task_state_flags & SAS_TASK_STATE_ABORTED);
 	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
 		spin_unlock_irqrestore(&task->task_state_lock, flags);
-		SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n",
-			    cmd, task);
-		return EH_HANDLED;
+		SAS_DPRINTK("command 0x%p, task 0x%p, timed out: "
+			    "BLK_EH_HANDLED\n", cmd, task);
+		return BLK_EH_HANDLED;
 	}
 	if (!(task->task_state_flags & SAS_TASK_AT_INITIATOR)) {
 		spin_unlock_irqrestore(&task->task_state_lock, flags);
 		SAS_DPRINTK("command 0x%p, task 0x%p, not at initiator: "
-			    "EH_RESET_TIMER\n",
+			    "BLK_EH_RESET_TIMER\n",
 			    cmd, task);
-		return EH_RESET_TIMER;
+		return BLK_EH_RESET_TIMER;
 	}
 	task->task_state_flags |= SAS_TASK_STATE_ABORTED;
 	spin_unlock_irqrestore(&task->task_state_lock, flags);
 
-	SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_NOT_HANDLED\n",
+	SAS_DPRINTK("command 0x%p, task 0x%p, timed out: BLK_EH_NOT_HANDLED\n",
 		    cmd, task);
 
-	return EH_NOT_HANDLED;
+	return BLK_EH_NOT_HANDLED;
 }
 
 int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
@@ -1039,7 +1039,7 @@
 		return;
 	}
 
-	scsi_req_abort_cmd(sc);
+	blk_abort_request(sc->request);
 	scsi_schedule_eh(sc->device->host);
 }
 
diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index 97b7633..afe1de9 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -1167,7 +1167,7 @@
  * cmd has not been completed within the timeout period.
  */
 static enum
-scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
+blk_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
 {
 	struct megasas_cmd *cmd = (struct megasas_cmd *)scmd->SCp.ptr;
 	struct megasas_instance *instance;
@@ -1175,7 +1175,7 @@
 
 	if (time_after(jiffies, scmd->jiffies_at_alloc +
 				(MEGASAS_DEFAULT_CMD_TIMEOUT * 2) * HZ)) {
-		return EH_NOT_HANDLED;
+		return BLK_EH_NOT_HANDLED;
 	}
 
 	instance = cmd->instance;
@@ -1189,7 +1189,7 @@
 
 		spin_unlock_irqrestore(instance->host->host_lock, flags);
 	}
-	return EH_RESET_TIMER;
+	return BLK_EH_RESET_TIMER;
 }
 
 /**
diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c
index c57c94c..3b7240e 100644
--- a/drivers/scsi/ncr53c8xx.c
+++ b/drivers/scsi/ncr53c8xx.c
@@ -4170,8 +4170,8 @@
 	**
 	**----------------------------------------------------
 	*/
-	if (np->settle_time && cmd->timeout_per_command >= HZ) {
-		u_long tlimit = jiffies + cmd->timeout_per_command - HZ;
+	if (np->settle_time && cmd->request->timeout >= HZ) {
+		u_long tlimit = jiffies + cmd->request->timeout - HZ;
 		if (time_after(np->settle_time, tlimit))
 			np->settle_time = tlimit;
 	}
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 37f9ba0..b6cd12b 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -2845,7 +2845,7 @@
 	memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8));
 
 	/* Set ISP command timeout. */
-	pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ);
+	pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ);
 
 	/* Set device target ID and LUN */
 	pkt->lun = SCSI_LUN_32(cmd);
@@ -3114,7 +3114,7 @@
 	memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8));
 
 	/* Set ISP command timeout. */
-	pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ);
+	pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ);
 
 	/* Set device target ID and LUN */
 	pkt->lun = SCSI_LUN_32(cmd);
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 45e7dcb..0ddfe71 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -292,10 +292,11 @@
 		valid = 0;
 		if (ha->optrom_size == OPTROM_SIZE_2300 && start == 0)
 			valid = 1;
-		else if (start == (FA_BOOT_CODE_ADDR*4) ||
-		    start == (FA_RISC_CODE_ADDR*4))
+		else if (start == (ha->flt_region_boot * 4) ||
+		    start == (ha->flt_region_fw * 4))
 			valid = 1;
-		else if (IS_QLA25XX(ha) && start == (FA_VPD_NVRAM_ADDR*4))
+		else if (IS_QLA25XX(ha) &&
+		    start == (ha->flt_region_vpd_nvram * 4))
 		    valid = 1;
 		if (!valid) {
 			qla_printk(KERN_WARNING, ha,
@@ -1065,6 +1066,8 @@
 		pfc_host_stat->dumped_frames = stats->dumped_frames;
 		pfc_host_stat->nos_count = stats->nos_rcvd;
 	}
+	pfc_host_stat->fcp_input_megabytes = ha->qla_stats.input_bytes >> 20;
+	pfc_host_stat->fcp_output_megabytes = ha->qla_stats.output_bytes >> 20;
 
 done_free:
         dma_pool_free(ha->s_dma_pool, stats, stats_dma);
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 94a720e..83c8192 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -25,7 +25,6 @@
 #include <linux/firmware.h>
 #include <linux/aer.h>
 #include <linux/mutex.h>
-#include <linux/semaphore.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
@@ -2157,6 +2156,8 @@
 
 struct qla_statistics {
 	uint32_t total_isp_aborts;
+	uint64_t input_bytes;
+	uint64_t output_bytes;
 };
 
 /*
@@ -2238,6 +2239,7 @@
 #define FCPORT_UPDATE_NEEDED	27
 #define VP_DPC_NEEDED		28	/* wake up for VP dpc handling */
 #define UNLOADING		29
+#define NPIV_CONFIG_NEEDED	30
 
 	uint32_t	device_flags;
 #define DFLG_LOCAL_DEVICES		BIT_0
@@ -2507,7 +2509,6 @@
 	uint64_t	fce_wr, fce_rd;
 	struct mutex	fce_mutex;
 
-	uint32_t	hw_event_start;
 	uint32_t	hw_event_ptr;
 	uint32_t	hw_event_pause_errors;
 
@@ -2553,6 +2554,14 @@
 	uint32_t	fdt_unprotect_sec_cmd;
 	uint32_t	fdt_protect_sec_cmd;
 
+	uint32_t	flt_region_flt;
+	uint32_t	flt_region_fdt;
+	uint32_t	flt_region_boot;
+	uint32_t	flt_region_fw;
+	uint32_t	flt_region_vpd_nvram;
+	uint32_t	flt_region_hw_event;
+	uint32_t	flt_region_npiv_conf;
+
 	/* Needed for BEACON */
 	uint16_t	beacon_blink_led;
 	uint8_t		beacon_color_state;
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index cf19451..d1d1420 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -789,14 +789,23 @@
 #define FA_RISC_CODE_ADDR	0x20000
 #define FA_RISC_CODE_SEGMENTS	2
 
+#define FA_FLASH_DESCR_ADDR_24	0x11000
+#define FA_FLASH_LAYOUT_ADDR_24	0x11400
+#define FA_NPIV_CONF0_ADDR_24	0x16000
+#define FA_NPIV_CONF1_ADDR_24	0x17000
+
 #define FA_FW_AREA_ADDR		0x40000
 #define FA_VPD_NVRAM_ADDR	0x48000
 #define FA_FEATURE_ADDR		0x4C000
 #define FA_FLASH_DESCR_ADDR	0x50000
+#define FA_FLASH_LAYOUT_ADDR	0x50400
 #define FA_HW_EVENT0_ADDR	0x54000
-#define FA_HW_EVENT1_ADDR	0x54200
+#define FA_HW_EVENT1_ADDR	0x54400
 #define FA_HW_EVENT_SIZE	0x200
 #define FA_HW_EVENT_ENTRY_SIZE	4
+#define FA_NPIV_CONF0_ADDR	0x5C000
+#define FA_NPIV_CONF1_ADDR	0x5D000
+
 /*
  * Flash Error Log Event Codes.
  */
@@ -806,10 +815,6 @@
 #define HW_EVENT_NVRAM_CHKSUM_ERR	0xF023
 #define HW_EVENT_FLASH_FW_ERR	0xF024
 
-#define FA_BOOT_LOG_ADDR	0x58000
-#define FA_FW_DUMP0_ADDR	0x60000
-#define FA_FW_DUMP1_ADDR	0x70000
-
 	uint32_t flash_data;		/* Flash/NVRAM BIOS data. */
 
 	uint32_t ctrl_status;		/* Control/Status. */
@@ -1203,6 +1208,62 @@
 	uint8_t unused2[65];
 };
 
+/* Flash Layout Table ********************************************************/
+
+struct qla_flt_location {
+	uint8_t sig[4];
+	uint32_t start_lo;
+	uint32_t start_hi;
+	uint16_t unused;
+	uint16_t checksum;
+};
+
+struct qla_flt_header {
+	uint16_t version;
+	uint16_t length;
+	uint16_t checksum;
+	uint16_t unused;
+};
+
+#define FLT_REG_FW		0x01
+#define FLT_REG_BOOT_CODE	0x07
+#define FLT_REG_VPD_0		0x14
+#define FLT_REG_NVRAM_0		0x15
+#define FLT_REG_VPD_1		0x16
+#define FLT_REG_NVRAM_1		0x17
+#define FLT_REG_FDT		0x1a
+#define FLT_REG_FLT		0x1c
+#define FLT_REG_HW_EVENT_0	0x1d
+#define FLT_REG_HW_EVENT_1	0x1f
+#define FLT_REG_NPIV_CONF_0	0x29
+#define FLT_REG_NPIV_CONF_1	0x2a
+
+struct qla_flt_region {
+	uint32_t code;
+	uint32_t size;
+	uint32_t start;
+	uint32_t end;
+};
+
+/* Flash NPIV Configuration Table ********************************************/
+
+struct qla_npiv_header {
+	uint8_t sig[2];
+	uint16_t version;
+	uint16_t entries;
+	uint16_t unused[4];
+	uint16_t checksum;
+};
+
+struct qla_npiv_entry {
+	uint16_t flags;
+	uint16_t vf_id;
+	uint16_t qos;
+	uint16_t unused1;
+	uint8_t port_name[WWN_SIZE];
+	uint8_t node_name[WWN_SIZE];
+};
+
 /* 84XX Support **************************************************************/
 
 #define MBA_ISP84XX_ALERT	0x800f  /* Alert Notification. */
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 0b15673..753dbe6 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -313,9 +313,11 @@
 extern int qla2xxx_hw_event_log(scsi_qla_host_t *, uint16_t , uint16_t,
     uint16_t, uint16_t);
 
-extern void qla2xxx_get_flash_info(scsi_qla_host_t *);
+extern int qla2xxx_get_flash_info(scsi_qla_host_t *);
 extern int qla2xxx_get_vpd_field(scsi_qla_host_t *, char *, char *, size_t);
 
+extern void qla2xxx_flash_npiv_conf(scsi_qla_host_t *);
+
 /*
  * Global Function Prototypes in qla_dbg.c source file.
  */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index ee89ddd..a470f2d 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -83,6 +83,13 @@
 
 	ha->isp_ops->reset_chip(ha);
 
+	rval = qla2xxx_get_flash_info(ha);
+	if (rval) {
+		DEBUG2(printk("scsi(%ld): Unable to validate FLASH data.\n",
+		    ha->host_no));
+		return (rval);
+	}
+
 	ha->isp_ops->get_flash_version(ha, ha->request_ring);
 
 	qla_printk(KERN_INFO, ha, "Configure NVRAM parameters...\n");
@@ -109,7 +116,6 @@
 		rval = qla2x00_setup_chip(ha);
 		if (rval)
 			return (rval);
-		qla2xxx_get_flash_info(ha);
 	}
 	if (IS_QLA84XX(ha)) {
 		ha->cs84xx = qla84xx_get_chip(ha);
@@ -2016,7 +2022,7 @@
 		DEBUG3(printk("%s: exiting normally\n", __func__));
 	}
 
-	/* Restore state if a resync event occured during processing */
+	/* Restore state if a resync event occurred during processing */
 	if (test_bit(LOOP_RESYNC_NEEDED, &ha->dpc_flags)) {
 		if (test_bit(LOCAL_LOOP_UPDATE, &save_flags))
 			set_bit(LOCAL_LOOP_UPDATE, &ha->dpc_flags);
@@ -2561,7 +2567,7 @@
 	rval = QLA_SUCCESS;
 
 	/* Try GID_PT to get device list, else GAN. */
-	swl = kcalloc(MAX_FIBRE_DEVICES, sizeof(sw_info_t), GFP_ATOMIC);
+	swl = kcalloc(MAX_FIBRE_DEVICES, sizeof(sw_info_t), GFP_KERNEL);
 	if (!swl) {
 		/*EMPTY*/
 		DEBUG2(printk("scsi(%ld): GID_PT allocations failed, fallback "
@@ -3751,7 +3757,7 @@
 	rval = QLA_SUCCESS;
 
 	segments = FA_RISC_CODE_SEGMENTS;
-	faddr = FA_RISC_CODE_ADDR;
+	faddr = ha->flt_region_fw;
 	dcode = (uint32_t *)ha->request_ring;
 	*srisc_addr = 0;
 
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index 92fafbd..e90afad 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -52,7 +52,7 @@
  * @ha: HA context
  * @ha_locked: is function called with the hardware lock
  *
- * Returns non-zero if a failure occured, else zero.
+ * Returns non-zero if a failure occurred, else zero.
  */
 static inline int
 qla2x00_issue_marker(scsi_qla_host_t *ha, int ha_locked)
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index d57669a..85bc0a4 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -21,17 +21,22 @@
  * Returns the proper CF_* direction based on CDB.
  */
 static inline uint16_t
-qla2x00_get_cmd_direction(struct scsi_cmnd *cmd)
+qla2x00_get_cmd_direction(srb_t *sp)
 {
 	uint16_t cflags;
 
 	cflags = 0;
 
 	/* Set transfer direction */
-	if (cmd->sc_data_direction == DMA_TO_DEVICE)
+	if (sp->cmd->sc_data_direction == DMA_TO_DEVICE) {
 		cflags = CF_WRITE;
-	else if (cmd->sc_data_direction == DMA_FROM_DEVICE)
+		sp->fcport->ha->qla_stats.output_bytes +=
+		    scsi_bufflen(sp->cmd);
+	} else if (sp->cmd->sc_data_direction == DMA_FROM_DEVICE) {
 		cflags = CF_READ;
+		sp->fcport->ha->qla_stats.input_bytes +=
+		    scsi_bufflen(sp->cmd);
+	}
 	return (cflags);
 }
 
@@ -169,7 +174,7 @@
 
 	ha = sp->ha;
 
-	cmd_pkt->control_flags |= cpu_to_le16(qla2x00_get_cmd_direction(cmd));
+	cmd_pkt->control_flags |= cpu_to_le16(qla2x00_get_cmd_direction(sp));
 
 	/* Three DSDs are available in the Command Type 2 IOCB */
 	avail_dsds = 3;
@@ -228,7 +233,7 @@
 
 	ha = sp->ha;
 
-	cmd_pkt->control_flags |= cpu_to_le16(qla2x00_get_cmd_direction(cmd));
+	cmd_pkt->control_flags |= cpu_to_le16(qla2x00_get_cmd_direction(sp));
 
 	/* Two DSDs are available in the Command Type 3 IOCB */
 	avail_dsds = 2;
@@ -262,7 +267,7 @@
  * qla2x00_start_scsi() - Send a SCSI command to the ISP
  * @sp: command to send to the ISP
  *
- * Returns non-zero if a failure occured, else zero.
+ * Returns non-zero if a failure occurred, else zero.
  */
 int
 qla2x00_start_scsi(srb_t *sp)
@@ -407,7 +412,7 @@
  *
  * Can be called from both normal and interrupt context.
  *
- * Returns non-zero if a failure occured, else zero.
+ * Returns non-zero if a failure occurred, else zero.
  */
 int
 __qla2x00_marker(scsi_qla_host_t *ha, uint16_t loop_id, uint16_t lun,
@@ -625,12 +630,17 @@
 	ha = sp->ha;
 
 	/* Set transfer direction */
-	if (cmd->sc_data_direction == DMA_TO_DEVICE)
+	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
 		cmd_pkt->task_mgmt_flags =
 		    __constant_cpu_to_le16(TMF_WRITE_DATA);
-	else if (cmd->sc_data_direction == DMA_FROM_DEVICE)
+		sp->fcport->ha->qla_stats.output_bytes +=
+		    scsi_bufflen(sp->cmd);
+	} else if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
 		cmd_pkt->task_mgmt_flags =
 		    __constant_cpu_to_le16(TMF_READ_DATA);
+		sp->fcport->ha->qla_stats.input_bytes +=
+		    scsi_bufflen(sp->cmd);
+	}
 
 	/* One DSD is available in the Command Type 3 IOCB */
 	avail_dsds = 1;
@@ -666,7 +676,7 @@
  * qla24xx_start_scsi() - Send a SCSI command to the ISP
  * @sp: command to send to the ISP
  *
- * Returns non-zero if a failure occured, else zero.
+ * Returns non-zero if a failure occurred, else zero.
  */
 int
 qla24xx_start_scsi(srb_t *sp)
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index bf41887..fc4bfa7 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -391,9 +391,9 @@
 		break;
 
 	case MBA_LIP_OCCURRED:		/* Loop Initialization Procedure */
-		DEBUG2(printk("scsi(%ld): LIP occured (%x).\n", ha->host_no,
+		DEBUG2(printk("scsi(%ld): LIP occurred (%x).\n", ha->host_no,
 		    mb[1]));
-		qla_printk(KERN_INFO, ha, "LIP occured (%x).\n", mb[1]);
+		qla_printk(KERN_INFO, ha, "LIP occurred (%x).\n", mb[1]);
 
 		if (atomic_read(&ha->loop_state) != LOOP_DOWN) {
 			atomic_set(&ha->loop_state, LOOP_DOWN);
@@ -460,7 +460,7 @@
 		DEBUG2(printk("scsi(%ld): Asynchronous LIP RESET (%x).\n",
 		    ha->host_no, mb[1]));
 		qla_printk(KERN_INFO, ha,
-		    "LIP reset occured (%x).\n", mb[1]);
+		    "LIP reset occurred (%x).\n", mb[1]);
 
 		if (atomic_read(&ha->loop_state) != LOOP_DOWN) {
 			atomic_set(&ha->loop_state, LOOP_DOWN);
@@ -543,7 +543,7 @@
 
 	case MBA_PORT_UPDATE:		/* Port database update */
 		/*
-		 * If PORT UPDATE is global (recieved LIP_OCCURED/LIP_RESET
+		 * If PORT UPDATE is global (received LIP_OCCURRED/LIP_RESET
 		 * event etc. earlier indicating loop is down) then process
 		 * it.  Otherwise ignore it and Wait for RSCN to come in.
 		 */
@@ -589,7 +589,7 @@
 		    "scsi(%ld): RSCN database changed -- %04x %04x %04x.\n",
 		    ha->host_no, mb[1], mb[2], mb[3]));
 
-		rscn_entry = (mb[1] << 16) | mb[2];
+		rscn_entry = ((mb[1] & 0xff) << 16) | mb[2];
 		host_pid = (ha->d_id.b.domain << 16) | (ha->d_id.b.area << 8) |
 		    ha->d_id.b.al_pa;
 		if (rscn_entry == host_pid) {
@@ -600,6 +600,8 @@
 			break;
 		}
 
+		/* Ignore reserved bits from RSCN-payload. */
+		rscn_entry = ((mb[1] & 0x3ff) << 16) | mb[2];
 		rscn_queue_index = ha->rscn_in_ptr + 1;
 		if (rscn_queue_index == MAX_RSCN_COUNT)
 			rscn_queue_index = 0;
@@ -1060,8 +1062,9 @@
 		resid = resid_len;
 		/* Use F/W calculated residual length. */
 		if (IS_FWI2_CAPABLE(ha)) {
-			if (scsi_status & SS_RESIDUAL_UNDER &&
-			    resid != fw_resid_len) {
+			if (!(scsi_status & SS_RESIDUAL_UNDER)) {
+				lscsi_status = 0;
+			} else if (resid != fw_resid_len) {
 				scsi_status &= ~SS_RESIDUAL_UNDER;
 				lscsi_status = 0;
 			}
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 813bc77..36bc685 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -233,7 +233,7 @@
 			DEBUG2_3_11(printk("%s(%ld): timeout schedule "
 			    "isp_abort_needed.\n", __func__, ha->host_no));
 			qla_printk(KERN_WARNING, ha,
-			    "Mailbox command timeout occured. Scheduling ISP "
+			    "Mailbox command timeout occurred. Scheduling ISP "
 			    "abort.\n");
 			set_bit(ISP_ABORT_NEEDED, &ha->dpc_flags);
 			qla2xxx_wake_dpc(ha);
@@ -244,7 +244,7 @@
 			DEBUG2_3_11(printk("%s(%ld): timeout calling "
 			    "abort_isp\n", __func__, ha->host_no));
 			qla_printk(KERN_WARNING, ha,
-			    "Mailbox command timeout occured. Issuing ISP "
+			    "Mailbox command timeout occurred. Issuing ISP "
 			    "abort.\n");
 
 			set_bit(ABORT_ISP_ACTIVE, &ha->dpc_flags);
@@ -1995,7 +1995,7 @@
 	char *pmap;
 	dma_addr_t pmap_dma;
 
-	pmap = dma_pool_alloc(ha->s_dma_pool, GFP_ATOMIC, &pmap_dma);
+	pmap = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &pmap_dma);
 	if (pmap  == NULL) {
 		DEBUG2_3_11(printk("%s(%ld): **** Mem Alloc Failed ****",
 		    __func__, ha->host_no));
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 6d0f0e5..3433441 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1517,6 +1517,7 @@
 	set_bit(LOOP_RESYNC_NEEDED, &ha->dpc_flags);
 	set_bit(LOCAL_LOOP_UPDATE, &ha->dpc_flags);
 	set_bit(RSCN_UPDATE, &ha->dpc_flags);
+	set_bit(NPIV_CONFIG_NEEDED, &ha->dpc_flags);
 }
 
 static int
@@ -1663,8 +1664,6 @@
 		ha->gid_list_info_size = 8;
 		ha->optrom_size = OPTROM_SIZE_25XX;
 		ha->isp_ops = &qla25xx_isp_ops;
-		ha->hw_event_start = PCI_FUNC(pdev->devfn) ?
-		    FA_HW_EVENT1_ADDR: FA_HW_EVENT0_ADDR;
 	}
 	host->can_queue = ha->request_q_length + 128;
 
@@ -2433,6 +2432,12 @@
 			    ha->host_no));
 		}
 
+		if (test_bit(NPIV_CONFIG_NEEDED, &ha->dpc_flags) &&
+		    atomic_read(&ha->loop_state) == LOOP_READY) {
+			clear_bit(NPIV_CONFIG_NEEDED, &ha->dpc_flags);
+			qla2xxx_flash_npiv_conf(ha);
+		}
+
 		if (!ha->interrupts_on)
 			ha->isp_ops->enable_intrs(ha);
 
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index 1bca744..90a1321 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -543,23 +543,198 @@
 	}
 }
 
-void
-qla2xxx_get_flash_info(scsi_qla_host_t *ha)
+static int
+qla2xxx_find_flt_start(scsi_qla_host_t *ha, uint32_t *start)
+{
+	const char *loc, *locations[] = { "DEF", "PCI" };
+	uint32_t pcihdr, pcids;
+	uint32_t *dcode;
+	uint8_t *buf, *bcode, last_image;
+	uint16_t cnt, chksum, *wptr;
+	struct qla_flt_location *fltl;
+
+	/*
+	 * FLT-location structure resides after the last PCI region.
+	 */
+
+	/* Begin with sane defaults. */
+	loc = locations[0];
+	*start = IS_QLA24XX_TYPE(ha) ? FA_FLASH_LAYOUT_ADDR_24:
+	    FA_FLASH_LAYOUT_ADDR;
+
+	/* Begin with first PCI expansion ROM header. */
+	buf = (uint8_t *)ha->request_ring;
+	dcode = (uint32_t *)ha->request_ring;
+	pcihdr = 0;
+	last_image = 1;
+	do {
+		/* Verify PCI expansion ROM header. */
+		qla24xx_read_flash_data(ha, dcode, pcihdr >> 2, 0x20);
+		bcode = buf + (pcihdr % 4);
+		if (bcode[0x0] != 0x55 || bcode[0x1] != 0xaa)
+			goto end;
+
+		/* Locate PCI data structure. */
+		pcids = pcihdr + ((bcode[0x19] << 8) | bcode[0x18]);
+		qla24xx_read_flash_data(ha, dcode, pcids >> 2, 0x20);
+		bcode = buf + (pcihdr % 4);
+
+		/* Validate signature of PCI data structure. */
+		if (bcode[0x0] != 'P' || bcode[0x1] != 'C' ||
+		    bcode[0x2] != 'I' || bcode[0x3] != 'R')
+			goto end;
+
+		last_image = bcode[0x15] & BIT_7;
+
+		/* Locate next PCI expansion ROM. */
+		pcihdr += ((bcode[0x11] << 8) | bcode[0x10]) * 512;
+	} while (!last_image);
+
+	/* Now verify FLT-location structure. */
+	fltl = (struct qla_flt_location *)ha->request_ring;
+	qla24xx_read_flash_data(ha, dcode, pcihdr >> 2,
+	    sizeof(struct qla_flt_location) >> 2);
+	if (fltl->sig[0] != 'Q' || fltl->sig[1] != 'F' ||
+	    fltl->sig[2] != 'L' || fltl->sig[3] != 'T')
+		goto end;
+
+	wptr = (uint16_t *)ha->request_ring;
+	cnt = sizeof(struct qla_flt_location) >> 1;
+	for (chksum = 0; cnt; cnt--)
+		chksum += le16_to_cpu(*wptr++);
+	if (chksum) {
+		qla_printk(KERN_ERR, ha,
+		    "Inconsistent FLTL detected: checksum=0x%x.\n", chksum);
+		qla2x00_dump_buffer(buf, sizeof(struct qla_flt_location));
+		return QLA_FUNCTION_FAILED;
+	}
+
+	/* Good data.  Use specified location. */
+	loc = locations[1];
+	*start = le16_to_cpu(fltl->start_hi) << 16 |
+	    le16_to_cpu(fltl->start_lo);
+end:
+	DEBUG2(qla_printk(KERN_DEBUG, ha, "FLTL[%s] = 0x%x.\n", loc, *start));
+	return QLA_SUCCESS;
+}
+
+static void
+qla2xxx_get_flt_info(scsi_qla_host_t *ha, uint32_t flt_addr)
+{
+	const char *loc, *locations[] = { "DEF", "FLT" };
+	uint16_t *wptr;
+	uint16_t cnt, chksum;
+	uint32_t start;
+	struct qla_flt_header *flt;
+	struct qla_flt_region *region;
+
+	ha->flt_region_flt = flt_addr;
+	wptr = (uint16_t *)ha->request_ring;
+	flt = (struct qla_flt_header *)ha->request_ring;
+	region = (struct qla_flt_region *)&flt[1];
+	ha->isp_ops->read_optrom(ha, (uint8_t *)ha->request_ring,
+	    flt_addr << 2, OPTROM_BURST_SIZE);
+	if (*wptr == __constant_cpu_to_le16(0xffff))
+		goto no_flash_data;
+	if (flt->version != __constant_cpu_to_le16(1)) {
+		DEBUG2(qla_printk(KERN_INFO, ha, "Unsupported FLT detected: "
+		    "version=0x%x length=0x%x checksum=0x%x.\n",
+		    le16_to_cpu(flt->version), le16_to_cpu(flt->length),
+		    le16_to_cpu(flt->checksum)));
+		goto no_flash_data;
+	}
+
+	cnt = (sizeof(struct qla_flt_header) + le16_to_cpu(flt->length)) >> 1;
+	for (chksum = 0; cnt; cnt--)
+		chksum += le16_to_cpu(*wptr++);
+	if (chksum) {
+		DEBUG2(qla_printk(KERN_INFO, ha, "Inconsistent FLT detected: "
+		    "version=0x%x length=0x%x checksum=0x%x.\n",
+		    le16_to_cpu(flt->version), le16_to_cpu(flt->length),
+		    chksum));
+		goto no_flash_data;
+	}
+
+	loc = locations[1];
+	cnt = le16_to_cpu(flt->length) / sizeof(struct qla_flt_region);
+	for ( ; cnt; cnt--, region++) {
+		/* Store addresses as DWORD offsets. */
+		start = le32_to_cpu(region->start) >> 2;
+
+		DEBUG3(qla_printk(KERN_DEBUG, ha, "FLT[%02x]: start=0x%x "
+		    "end=0x%x size=0x%x.\n", le32_to_cpu(region->code), start,
+		    le32_to_cpu(region->end) >> 2, le32_to_cpu(region->size)));
+
+		switch (le32_to_cpu(region->code)) {
+		case FLT_REG_FW:
+			ha->flt_region_fw = start;
+			break;
+		case FLT_REG_BOOT_CODE:
+			ha->flt_region_boot = start;
+			break;
+		case FLT_REG_VPD_0:
+			ha->flt_region_vpd_nvram = start;
+			break;
+		case FLT_REG_FDT:
+			ha->flt_region_fdt = start;
+			break;
+		case FLT_REG_HW_EVENT_0:
+			if (!PCI_FUNC(ha->pdev->devfn))
+				ha->flt_region_hw_event = start;
+			break;
+		case FLT_REG_HW_EVENT_1:
+			if (PCI_FUNC(ha->pdev->devfn))
+				ha->flt_region_hw_event = start;
+			break;
+		case FLT_REG_NPIV_CONF_0:
+			if (!PCI_FUNC(ha->pdev->devfn))
+				ha->flt_region_npiv_conf = start;
+			break;
+		case FLT_REG_NPIV_CONF_1:
+			if (PCI_FUNC(ha->pdev->devfn))
+				ha->flt_region_npiv_conf = start;
+			break;
+		}
+	}
+	goto done;
+
+no_flash_data:
+	/* Use hardcoded defaults. */
+	loc = locations[0];
+	ha->flt_region_fw = FA_RISC_CODE_ADDR;
+	ha->flt_region_boot = FA_BOOT_CODE_ADDR;
+	ha->flt_region_vpd_nvram = FA_VPD_NVRAM_ADDR;
+	ha->flt_region_fdt = IS_QLA24XX_TYPE(ha) ? FA_FLASH_DESCR_ADDR_24:
+	    FA_FLASH_DESCR_ADDR;
+	ha->flt_region_hw_event = !PCI_FUNC(ha->pdev->devfn) ?
+	    FA_HW_EVENT0_ADDR: FA_HW_EVENT1_ADDR;
+	ha->flt_region_npiv_conf = !PCI_FUNC(ha->pdev->devfn) ?
+	    (IS_QLA24XX_TYPE(ha) ? FA_NPIV_CONF0_ADDR_24: FA_NPIV_CONF0_ADDR):
+	    (IS_QLA24XX_TYPE(ha) ? FA_NPIV_CONF1_ADDR_24: FA_NPIV_CONF1_ADDR);
+done:
+	DEBUG2(qla_printk(KERN_DEBUG, ha, "FLT[%s]: boot=0x%x fw=0x%x "
+	    "vpd_nvram=0x%x fdt=0x%x flt=0x%x hwe=0x%x npiv=0x%x.\n", loc,
+	    ha->flt_region_boot, ha->flt_region_fw, ha->flt_region_vpd_nvram,
+	    ha->flt_region_fdt, ha->flt_region_flt, ha->flt_region_hw_event,
+	    ha->flt_region_npiv_conf));
+}
+
+static void
+qla2xxx_get_fdt_info(scsi_qla_host_t *ha)
 {
 #define FLASH_BLK_SIZE_32K	0x8000
 #define FLASH_BLK_SIZE_64K	0x10000
+	const char *loc, *locations[] = { "MID", "FDT" };
 	uint16_t cnt, chksum;
 	uint16_t *wptr;
 	struct qla_fdt_layout *fdt;
 	uint8_t	man_id, flash_id;
-
-	if (!IS_QLA24XX_TYPE(ha) && !IS_QLA25XX(ha))
-		return;
+	uint16_t mid, fid;
 
 	wptr = (uint16_t *)ha->request_ring;
 	fdt = (struct qla_fdt_layout *)ha->request_ring;
 	ha->isp_ops->read_optrom(ha, (uint8_t *)ha->request_ring,
-	    FA_FLASH_DESCR_ADDR << 2, OPTROM_BURST_SIZE);
+	    ha->flt_region_fdt << 2, OPTROM_BURST_SIZE);
 	if (*wptr == __constant_cpu_to_le16(0xffff))
 		goto no_flash_data;
 	if (fdt->sig[0] != 'Q' || fdt->sig[1] != 'L' || fdt->sig[2] != 'I' ||
@@ -577,7 +752,10 @@
 		goto no_flash_data;
 	}
 
-	ha->fdt_odd_index = le16_to_cpu(fdt->man_id) == 0x1f;
+	loc = locations[1];
+	mid = le16_to_cpu(fdt->man_id);
+	fid = le16_to_cpu(fdt->id);
+	ha->fdt_odd_index = mid == 0x1f;
 	ha->fdt_wrt_disable = fdt->wrt_disable_bits;
 	ha->fdt_erase_cmd = flash_conf_to_access_addr(0x0300 | fdt->erase_cmd);
 	ha->fdt_block_size = le32_to_cpu(fdt->block_size);
@@ -588,16 +766,12 @@
 		    flash_conf_to_access_addr(0x0300 | fdt->protect_sec_cmd):
 		    flash_conf_to_access_addr(0x0336);
 	}
-
-	DEBUG2(qla_printk(KERN_DEBUG, ha, "Flash[FDT]: (0x%x/0x%x) erase=0x%x "
-	    "pro=%x upro=%x idx=%d wrtd=0x%x blk=0x%x.\n",
-	    le16_to_cpu(fdt->man_id), le16_to_cpu(fdt->id), ha->fdt_erase_cmd,
-	    ha->fdt_protect_sec_cmd, ha->fdt_unprotect_sec_cmd,
-	    ha->fdt_odd_index, ha->fdt_wrt_disable, ha->fdt_block_size));
-	return;
-
+	goto done;
 no_flash_data:
+	loc = locations[0];
 	qla24xx_get_flash_manufacturer(ha, &man_id, &flash_id);
+	mid = man_id;
+	fid = flash_id;
 	ha->fdt_wrt_disable = 0x9c;
 	ha->fdt_erase_cmd = flash_conf_to_access_addr(0x03d8);
 	switch (man_id) {
@@ -625,14 +799,117 @@
 		ha->fdt_block_size = FLASH_BLK_SIZE_64K;
 		break;
 	}
-
-	DEBUG2(qla_printk(KERN_DEBUG, ha, "Flash[MID]: (0x%x/0x%x) erase=0x%x "
-	    "pro=%x upro=%x idx=%d wrtd=0x%x blk=0x%x.\n", man_id, flash_id,
+done:
+	DEBUG2(qla_printk(KERN_DEBUG, ha, "FDT[%s]: (0x%x/0x%x) erase=0x%x "
+	    "pro=%x upro=%x idx=%d wrtd=0x%x blk=0x%x.\n", loc, mid, fid,
 	    ha->fdt_erase_cmd, ha->fdt_protect_sec_cmd,
 	    ha->fdt_unprotect_sec_cmd, ha->fdt_odd_index, ha->fdt_wrt_disable,
 	    ha->fdt_block_size));
 }
 
+int
+qla2xxx_get_flash_info(scsi_qla_host_t *ha)
+{
+	int ret;
+	uint32_t flt_addr;
+
+	if (!IS_QLA24XX_TYPE(ha) && !IS_QLA25XX(ha))
+		return QLA_SUCCESS;
+
+	ret = qla2xxx_find_flt_start(ha, &flt_addr);
+	if (ret != QLA_SUCCESS)
+		return ret;
+
+	qla2xxx_get_flt_info(ha, flt_addr);
+	qla2xxx_get_fdt_info(ha);
+
+	return QLA_SUCCESS;
+}
+
+void
+qla2xxx_flash_npiv_conf(scsi_qla_host_t *ha)
+{
+#define NPIV_CONFIG_SIZE	(16*1024)
+	void *data;
+	uint16_t *wptr;
+	uint16_t cnt, chksum;
+	struct qla_npiv_header hdr;
+	struct qla_npiv_entry *entry;
+
+	if (!IS_QLA24XX_TYPE(ha) && !IS_QLA25XX(ha))
+		return;
+
+	ha->isp_ops->read_optrom(ha, (uint8_t *)&hdr,
+	    ha->flt_region_npiv_conf << 2, sizeof(struct qla_npiv_header));
+	if (hdr.version == __constant_cpu_to_le16(0xffff))
+		return;
+	if (hdr.version != __constant_cpu_to_le16(1)) {
+		DEBUG2(qla_printk(KERN_INFO, ha, "Unsupported NPIV-Config "
+		    "detected: version=0x%x entries=0x%x checksum=0x%x.\n",
+		    le16_to_cpu(hdr.version), le16_to_cpu(hdr.entries),
+		    le16_to_cpu(hdr.checksum)));
+		return;
+	}
+
+	data = kmalloc(NPIV_CONFIG_SIZE, GFP_KERNEL);
+	if (!data) {
+		DEBUG2(qla_printk(KERN_INFO, ha, "NPIV-Config: Unable to "
+		    "allocate memory.\n"));
+		return;
+	}
+
+	ha->isp_ops->read_optrom(ha, (uint8_t *)data,
+	    ha->flt_region_npiv_conf << 2, NPIV_CONFIG_SIZE);
+
+	cnt = (sizeof(struct qla_npiv_header) + le16_to_cpu(hdr.entries) *
+	    sizeof(struct qla_npiv_entry)) >> 1;
+	for (wptr = data, chksum = 0; cnt; cnt--)
+		chksum += le16_to_cpu(*wptr++);
+	if (chksum) {
+		DEBUG2(qla_printk(KERN_INFO, ha, "Inconsistent NPIV-Config "
+		    "detected: version=0x%x entries=0x%x checksum=0x%x.\n",
+		    le16_to_cpu(hdr.version), le16_to_cpu(hdr.entries),
+		    chksum));
+		goto done;
+	}
+
+	entry = data + sizeof(struct qla_npiv_header);
+	cnt = le16_to_cpu(hdr.entries);
+	for ( ; cnt; cnt--, entry++) {
+		uint16_t flags;
+		struct fc_vport_identifiers vid;
+		struct fc_vport *vport;
+
+		flags = le16_to_cpu(entry->flags);
+		if (flags == 0xffff)
+			continue;
+		if ((flags & BIT_0) == 0)
+			continue;
+
+		memset(&vid, 0, sizeof(vid));
+		vid.roles = FC_PORT_ROLE_FCP_INITIATOR;
+		vid.vport_type = FC_PORTTYPE_NPIV;
+		vid.disable = false;
+		vid.port_name = wwn_to_u64(entry->port_name);
+		vid.node_name = wwn_to_u64(entry->node_name);
+
+		DEBUG2(qla_printk(KERN_DEBUG, ha, "NPIV[%02x]: wwpn=%llx "
+		    "wwnn=%llx vf_id=0x%x qos=0x%x.\n", cnt,
+		    (unsigned long long)vid.port_name,
+		    (unsigned long long)vid.node_name,
+		    le16_to_cpu(entry->vf_id), le16_to_cpu(entry->qos)));
+
+		vport = fc_vport_create(ha->host, 0, &vid);
+		if (!vport)
+			qla_printk(KERN_INFO, ha, "NPIV-Config: Failed to "
+			    "create vport [%02x]: wwpn=%llx wwnn=%llx.\n", cnt,
+			    (unsigned long long)vid.port_name,
+			    (unsigned long long)vid.node_name);
+	}
+done:
+	kfree(data);
+}
+
 static void
 qla24xx_unprotect_flash(scsi_qla_host_t *ha)
 {
@@ -920,7 +1197,8 @@
 	dwptr = (uint32_t *)buf;
 	for (i = 0; i < bytes >> 2; i++, naddr++)
 		dwptr[i] = cpu_to_le32(qla24xx_read_flash_dword(ha,
-		    flash_data_to_access_addr(FA_VPD_NVRAM_ADDR | naddr)));
+		    flash_data_to_access_addr(ha->flt_region_vpd_nvram |
+		    naddr)));
 
 	return buf;
 }
@@ -935,10 +1213,10 @@
 	dbuf = vmalloc(RMW_BUFFER_SIZE);
 	if (!dbuf)
 		return QLA_MEMORY_ALLOC_FAILED;
-	ha->isp_ops->read_optrom(ha, dbuf, FA_VPD_NVRAM_ADDR << 2,
+	ha->isp_ops->read_optrom(ha, dbuf, ha->flt_region_vpd_nvram << 2,
 	    RMW_BUFFER_SIZE);
 	memcpy(dbuf + (naddr << 2), buf, bytes);
-	ha->isp_ops->write_optrom(ha, dbuf, FA_VPD_NVRAM_ADDR << 2,
+	ha->isp_ops->write_optrom(ha, dbuf, ha->flt_region_vpd_nvram << 2,
 	    RMW_BUFFER_SIZE);
 	vfree(dbuf);
 
@@ -2166,7 +2444,7 @@
 		memset(dbyte, 0, 8);
 		dcode = (uint16_t *)dbyte;
 
-		qla2x00_read_flash_data(ha, dbyte, FA_RISC_CODE_ADDR * 4 + 10,
+		qla2x00_read_flash_data(ha, dbyte, ha->flt_region_fw * 4 + 10,
 		    8);
 		DEBUG3(printk("%s(%ld): dumping fw ver from flash:\n",
 		    __func__, ha->host_no));
@@ -2177,7 +2455,7 @@
 		    (dcode[0] == 0 && dcode[1] == 0 && dcode[2] == 0 &&
 		    dcode[3] == 0)) {
 			DEBUG2(printk("%s(): Unrecognized fw revision at "
-			    "%x.\n", __func__, FA_RISC_CODE_ADDR * 4));
+			    "%x.\n", __func__, ha->flt_region_fw * 4));
 		} else {
 			/* values are in big endian */
 			ha->fw_revision[0] = dbyte[0] << 16 | dbyte[1];
@@ -2212,7 +2490,7 @@
 	dcode = mbuf;
 
 	/* Begin with first PCI expansion ROM header. */
-	pcihdr = 0;
+	pcihdr = ha->flt_region_boot;
 	last_image = 1;
 	do {
 		/* Verify PCI expansion ROM header. */
@@ -2282,7 +2560,7 @@
 	memset(ha->fw_revision, 0, sizeof(ha->fw_revision));
 	dcode = mbuf;
 
-	qla24xx_read_flash_data(ha, dcode, FA_RISC_CODE_ADDR + 4, 4);
+	qla24xx_read_flash_data(ha, dcode, ha->flt_region_fw + 4, 4);
 	for (i = 0; i < 4; i++)
 		dcode[i] = be32_to_cpu(dcode[i]);
 
@@ -2291,7 +2569,7 @@
 	    (dcode[0] == 0 && dcode[1] == 0 && dcode[2] == 0 &&
 	    dcode[3] == 0)) {
 		DEBUG2(printk("%s(): Unrecognized fw version at %x.\n",
-		    __func__, FA_RISC_CODE_ADDR));
+		    __func__, ha->flt_region_fw));
 	} else {
 		ha->fw_revision[0] = dcode[0];
 		ha->fw_revision[1] = dcode[1];
@@ -2355,7 +2633,7 @@
 	/* Locate first empty entry. */
 	for (;;) {
 		if (ha->hw_event_ptr >=
-		    ha->hw_event_start + FA_HW_EVENT_SIZE) {
+		    ha->flt_region_hw_event + FA_HW_EVENT_SIZE) {
 			DEBUG2(qla_printk(KERN_WARNING, ha,
 			    "HW event -- Log Full!\n"));
 			return QLA_MEMORY_ALLOC_FAILED;
@@ -2391,7 +2669,7 @@
 	int rval;
 	uint32_t marker[2], fdata[4];
 
-	if (ha->hw_event_start == 0)
+	if (ha->flt_region_hw_event == 0)
 		return QLA_FUNCTION_FAILED;
 
 	DEBUG2(qla_printk(KERN_WARNING, ha,
@@ -2406,7 +2684,7 @@
 		    QLA_DRIVER_PATCH_VER, QLA_DRIVER_BETA_VER);
 
 		/* Locate marker. */
-		ha->hw_event_ptr = ha->hw_event_start;
+		ha->hw_event_ptr = ha->flt_region_hw_event;
 		for (;;) {
 			qla24xx_read_flash_data(ha, fdata, ha->hw_event_ptr,
 			    4);
@@ -2415,7 +2693,7 @@
 				break;
 			ha->hw_event_ptr += FA_HW_EVENT_ENTRY_SIZE;
 			if (ha->hw_event_ptr >=
-			    ha->hw_event_start + FA_HW_EVENT_SIZE) {
+			    ha->flt_region_hw_event + FA_HW_EVENT_SIZE) {
 				DEBUG2(qla_printk(KERN_WARNING, ha,
 				    "HW event -- Log Full!\n"));
 				return QLA_MEMORY_ALLOC_FAILED;
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index 4160e4c..be5e299 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,7 +7,7 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.02.01-k7"
+#define QLA2XXX_VERSION      "8.02.01-k8"
 
 #define QLA_DRIVER_MAJOR_VER	8
 #define QLA_DRIVER_MINOR_VER	2
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 88bebb1..de8279a 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -1542,7 +1542,7 @@
 	DEBUG2(printk(KERN_INFO
 		      "scsi%ld: DEVICE_RESET cmd=%p jiffies = 0x%lx, to=%x,"
 		      "dpc_flags=%lx, status=%x allowed=%d\n", ha->host_no,
-		      cmd, jiffies, cmd->timeout_per_command / HZ,
+		      cmd, jiffies, cmd->request->timeout / HZ,
 		      ha->dpc_flags, cmd->result, cmd->allowed));
 
 	/* FIXME: wait for hba to go online */
@@ -1598,7 +1598,7 @@
 	DEBUG2(printk(KERN_INFO
 		      "scsi%ld: TARGET_DEVICE_RESET cmd=%p jiffies = 0x%lx, "
 		      "to=%x,dpc_flags=%lx, status=%x allowed=%d\n",
-		      ha->host_no, cmd, jiffies, cmd->timeout_per_command / HZ,
+		      ha->host_no, cmd, jiffies, cmd->request->timeout / HZ,
 		      ha->dpc_flags, cmd->result, cmd->allowed));
 
 	stat = qla4xxx_reset_target(ha, ddb_entry);
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index ee6be59..2ac3cb2 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -291,7 +291,6 @@
 		unsigned long flags;
 
 		cmd->device = dev;
-		init_timer(&cmd->eh_timeout);
 		INIT_LIST_HEAD(&cmd->list);
 		spin_lock_irqsave(&dev->list_lock, flags);
 		list_add_tail(&cmd->list, &dev->cmd_list);
@@ -652,26 +651,33 @@
 	unsigned long timeout;
 	int rtn = 0;
 
+	/*
+	 * We will use a queued command if possible, otherwise we will
+	 * emulate the queuing and calling of completion function ourselves.
+	 */
+	atomic_inc(&cmd->device->iorequest_cnt);
+
 	/* check if the device is still usable */
 	if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
 		/* in SDEV_DEL we error all commands. DID_NO_CONNECT
 		 * returns an immediate error upwards, and signals
 		 * that the device is no longer present */
 		cmd->result = DID_NO_CONNECT << 16;
-		atomic_inc(&cmd->device->iorequest_cnt);
-		__scsi_done(cmd);
+		scsi_done(cmd);
 		/* return 0 (because the command has been processed) */
 		goto out;
 	}
 
-	/* Check to see if the scsi lld put this device into state SDEV_BLOCK. */
-	if (unlikely(cmd->device->sdev_state == SDEV_BLOCK)) {
+	/* Check to see if the scsi lld made this device blocked. */
+	if (unlikely(scsi_device_blocked(cmd->device))) {
 		/* 
-		 * in SDEV_BLOCK, the command is just put back on the device
-		 * queue.  The suspend state has already blocked the queue so
-		 * future requests should not occur until the device 
-		 * transitions out of the suspend state.
+		 * in blocked state, the command is just put back on
+		 * the device queue.  The suspend state has already
+		 * blocked the queue so future requests should not
+		 * occur until the device transitions out of the
+		 * suspend state.
 		 */
+
 		scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
 
 		SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked \n"));
@@ -714,21 +720,9 @@
 		host->resetting = 0;
 	}
 
-	/* 
-	 * AK: unlikely race here: for some reason the timer could
-	 * expire before the serial number is set up below.
-	 */
-	scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out);
-
 	scsi_log_send(cmd);
 
 	/*
-	 * We will use a queued command if possible, otherwise we will
-	 * emulate the queuing and calling of completion function ourselves.
-	 */
-	atomic_inc(&cmd->device->iorequest_cnt);
-
-	/*
 	 * Before we queue this command, check if the command
 	 * length exceeds what the host adapter can handle.
 	 */
@@ -744,6 +738,12 @@
 	}
 
 	spin_lock_irqsave(host->host_lock, flags);
+	/*
+	 * AK: unlikely race here: for some reason the timer could
+	 * expire before the serial number is set up below.
+	 *
+	 * TODO: kill serial or move to blk layer
+	 */
 	scsi_cmd_get_serial(host, cmd); 
 
 	if (unlikely(host->shost_state == SHOST_DEL)) {
@@ -754,12 +754,8 @@
 	}
 	spin_unlock_irqrestore(host->host_lock, flags);
 	if (rtn) {
-		if (scsi_delete_timer(cmd)) {
-			atomic_inc(&cmd->device->iodone_cnt);
-			scsi_queue_insert(cmd,
-					  (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ?
-					  rtn : SCSI_MLQUEUE_HOST_BUSY);
-		}
+		scsi_queue_insert(cmd, (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ?
+						rtn : SCSI_MLQUEUE_HOST_BUSY);
 		SCSI_LOG_MLQUEUE(3,
 		    printk("queuecommand : request rejected\n"));
 	}
@@ -770,24 +766,6 @@
 }
 
 /**
- * scsi_req_abort_cmd -- Request command recovery for the specified command
- * @cmd: pointer to the SCSI command of interest
- *
- * This function requests that SCSI Core start recovery for the
- * command by deleting the timer and adding the command to the eh
- * queue.  It can be called by either LLDDs or SCSI Core.  LLDDs who
- * implement their own error recovery MAY ignore the timeout event if
- * they generated scsi_req_abort_cmd.
- */
-void scsi_req_abort_cmd(struct scsi_cmnd *cmd)
-{
-	if (!scsi_delete_timer(cmd))
-		return;
-	scsi_times_out(cmd);
-}
-EXPORT_SYMBOL(scsi_req_abort_cmd);
-
-/**
  * scsi_done - Enqueue the finished SCSI command into the done queue.
  * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives
  * ownership back to SCSI Core -- i.e. the LLDD has finished with it.
@@ -802,42 +780,7 @@
  */
 static void scsi_done(struct scsi_cmnd *cmd)
 {
-	/*
-	 * We don't have to worry about this one timing out anymore.
-	 * If we are unable to remove the timer, then the command
-	 * has already timed out.  In which case, we have no choice but to
-	 * let the timeout function run, as we have no idea where in fact
-	 * that function could really be.  It might be on another processor,
-	 * etc, etc.
-	 */
-	if (!scsi_delete_timer(cmd))
-		return;
-	__scsi_done(cmd);
-}
-
-/* Private entry to scsi_done() to complete a command when the timer
- * isn't running --- used by scsi_times_out */
-void __scsi_done(struct scsi_cmnd *cmd)
-{
-	struct request *rq = cmd->request;
-
-	/*
-	 * Set the serial numbers back to zero
-	 */
-	cmd->serial_number = 0;
-
-	atomic_inc(&cmd->device->iodone_cnt);
-	if (cmd->result)
-		atomic_inc(&cmd->device->ioerr_cnt);
-
-	BUG_ON(!rq);
-
-	/*
-	 * The uptodate/nbytes values don't matter, as we allow partial
-	 * completes and thus will check this in the softirq callback
-	 */
-	rq->completion_data = cmd;
-	blk_complete_request(rq);
+	blk_complete_request(cmd->request);
 }
 
 /* Move this to a header if it becomes more generally useful */
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 39ce3ab..fecefa0 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -112,69 +112,8 @@
 }
 
 /**
- * scsi_add_timer - Start timeout timer for a single scsi command.
- * @scmd:	scsi command that is about to start running.
- * @timeout:	amount of time to allow this command to run.
- * @complete:	timeout function to call if timer isn't canceled.
- *
- * Notes:
- *    This should be turned into an inline function.  Each scsi command
- *    has its own timer, and as it is added to the queue, we set up the
- *    timer.  When the command completes, we cancel the timer.
- */
-void scsi_add_timer(struct scsi_cmnd *scmd, int timeout,
-		    void (*complete)(struct scsi_cmnd *))
-{
-
-	/*
-	 * If the clock was already running for this command, then
-	 * first delete the timer.  The timer handling code gets rather
-	 * confused if we don't do this.
-	 */
-	if (scmd->eh_timeout.function)
-		del_timer(&scmd->eh_timeout);
-
-	scmd->eh_timeout.data = (unsigned long)scmd;
-	scmd->eh_timeout.expires = jiffies + timeout;
-	scmd->eh_timeout.function = (void (*)(unsigned long)) complete;
-
-	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:"
-					  " %d, (%p)\n", __func__,
-					  scmd, timeout, complete));
-
-	add_timer(&scmd->eh_timeout);
-}
-
-/**
- * scsi_delete_timer - Delete/cancel timer for a given function.
- * @scmd:	Cmd that we are canceling timer for
- *
- * Notes:
- *     This should be turned into an inline function.
- *
- * Return value:
- *     1 if we were able to detach the timer.  0 if we blew it, and the
- *     timer function has already started to run.
- */
-int scsi_delete_timer(struct scsi_cmnd *scmd)
-{
-	int rtn;
-
-	rtn = del_timer(&scmd->eh_timeout);
-
-	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p,"
-					 " rtn: %d\n", __func__,
-					 scmd, rtn));
-
-	scmd->eh_timeout.data = (unsigned long)NULL;
-	scmd->eh_timeout.function = NULL;
-
-	return rtn;
-}
-
-/**
  * scsi_times_out - Timeout function for normal scsi commands.
- * @scmd:	Cmd that is timing out.
+ * @req:	request that is timing out.
  *
  * Notes:
  *     We do not need to lock this.  There is the potential for a race
@@ -182,9 +121,11 @@
  *     normal completion function determines that the timer has already
  *     fired, then it mustn't do anything.
  */
-void scsi_times_out(struct scsi_cmnd *scmd)
+enum blk_eh_timer_return scsi_times_out(struct request *req)
 {
-	enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *);
+	struct scsi_cmnd *scmd = req->special;
+	enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *);
+	enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED;
 
 	scsi_log_completion(scmd, TIMEOUT_ERROR);
 
@@ -196,22 +137,20 @@
 		eh_timed_out = NULL;
 
 	if (eh_timed_out)
-		switch (eh_timed_out(scmd)) {
-		case EH_HANDLED:
-			__scsi_done(scmd);
-			return;
-		case EH_RESET_TIMER:
-			scsi_add_timer(scmd, scmd->timeout_per_command,
-				       scsi_times_out);
-			return;
-		case EH_NOT_HANDLED:
+		rtn = eh_timed_out(scmd);
+		switch (rtn) {
+		case BLK_EH_NOT_HANDLED:
 			break;
+		default:
+			return rtn;
 		}
 
 	if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
 		scmd->result |= DID_TIME_OUT << 16;
-		__scsi_done(scmd);
+		return BLK_EH_HANDLED;
 	}
+
+	return BLK_EH_NOT_HANDLED;
 }
 
 /**
@@ -1793,7 +1732,6 @@
 
 	blk_rq_init(NULL, &req);
 	scmd->request = &req;
-	memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout));
 
 	scmd->cmnd = req.cmd;
 
@@ -1804,8 +1742,6 @@
 
 	scmd->sc_data_direction		= DMA_BIDIRECTIONAL;
 
-	init_timer(&scmd->eh_timeout);
-
 	spin_lock_irqsave(shost->host_lock, flags);
 	shost->tmf_in_progress = 1;
 	spin_unlock_irqrestore(shost->host_lock, flags);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 62307bd..98ee55c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1181,7 +1181,6 @@
 	
 	cmd->transfersize = req->data_len;
 	cmd->allowed = req->retries;
-	cmd->timeout_per_command = req->timeout;
 	return BLKPREP_OK;
 }
 EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd);
@@ -1251,6 +1250,7 @@
 			break;
 		case SDEV_QUIESCE:
 		case SDEV_BLOCK:
+		case SDEV_CREATED_BLOCK:
 			/*
 			 * If the devices is blocked we defer normal commands.
 			 */
@@ -1416,17 +1416,26 @@
 	spin_unlock(shost->host_lock);
 	spin_lock(sdev->request_queue->queue_lock);
 
-	__scsi_done(cmd);
+	blk_complete_request(req);
 }
 
 static void scsi_softirq_done(struct request *rq)
 {
-	struct scsi_cmnd *cmd = rq->completion_data;
-	unsigned long wait_for = (cmd->allowed + 1) * cmd->timeout_per_command;
+	struct scsi_cmnd *cmd = rq->special;
+	unsigned long wait_for = (cmd->allowed + 1) * rq->timeout;
 	int disposition;
 
 	INIT_LIST_HEAD(&cmd->eh_entry);
 
+	/*
+	 * Set the serial numbers back to zero
+	 */
+	cmd->serial_number = 0;
+
+	atomic_inc(&cmd->device->iodone_cnt);
+	if (cmd->result)
+		atomic_inc(&cmd->device->ioerr_cnt);
+
 	disposition = scsi_decide_disposition(cmd);
 	if (disposition != SUCCESS &&
 	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
@@ -1675,6 +1684,7 @@
 
 	blk_queue_prep_rq(q, scsi_prep_fn);
 	blk_queue_softirq_done(q, scsi_softirq_done);
+	blk_queue_rq_timed_out(q, scsi_times_out);
 	return q;
 }
 
@@ -2064,10 +2074,13 @@
 
 	switch (state) {
 	case SDEV_CREATED:
-		/* There are no legal states that come back to
-		 * created.  This is the manually initialised start
-		 * state */
-		goto illegal;
+		switch (oldstate) {
+		case SDEV_CREATED_BLOCK:
+			break;
+		default:
+			goto illegal;
+		}
+		break;
 			
 	case SDEV_RUNNING:
 		switch (oldstate) {
@@ -2105,8 +2118,17 @@
 
 	case SDEV_BLOCK:
 		switch (oldstate) {
-		case SDEV_CREATED:
 		case SDEV_RUNNING:
+		case SDEV_CREATED_BLOCK:
+			break;
+		default:
+			goto illegal;
+		}
+		break;
+
+	case SDEV_CREATED_BLOCK:
+		switch (oldstate) {
+		case SDEV_CREATED:
 			break;
 		default:
 			goto illegal;
@@ -2394,8 +2416,12 @@
 	int err = 0;
 
 	err = scsi_device_set_state(sdev, SDEV_BLOCK);
-	if (err)
-		return err;
+	if (err) {
+		err = scsi_device_set_state(sdev, SDEV_CREATED_BLOCK);
+
+		if (err)
+			return err;
+	}
 
 	/* 
 	 * The device has transitioned to SDEV_BLOCK.  Stop the
@@ -2438,8 +2464,12 @@
 	 * and goose the device queue if successful.  
 	 */
 	err = scsi_device_set_state(sdev, SDEV_RUNNING);
-	if (err)
-		return err;
+	if (err) {
+		err = scsi_device_set_state(sdev, SDEV_CREATED);
+
+		if (err)
+			return err;
+	}
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_start_queue(q);
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index ae7ed9a..b37e133 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -21,6 +21,7 @@
 #include <linux/time.h>
 #include <linux/jiffies.h>
 #include <linux/security.h>
+#include <linux/delay.h>
 #include <net/sock.h>
 #include <net/netlink.h>
 
@@ -30,6 +31,39 @@
 struct sock *scsi_nl_sock = NULL;
 EXPORT_SYMBOL_GPL(scsi_nl_sock);
 
+static DEFINE_SPINLOCK(scsi_nl_lock);
+static struct list_head scsi_nl_drivers;
+
+static u32	scsi_nl_state;
+#define STATE_EHANDLER_BSY		0x00000001
+
+struct scsi_nl_transport {
+	int (*msg_handler)(struct sk_buff *);
+	void (*event_handler)(struct notifier_block *, unsigned long, void *);
+	unsigned int refcnt;
+	int flags;
+};
+
+/* flags values (bit flags) */
+#define HANDLER_DELETING		0x1
+
+static struct scsi_nl_transport transports[SCSI_NL_MAX_TRANSPORTS] =
+	{ {NULL, }, };
+
+
+struct scsi_nl_drvr {
+	struct list_head next;
+	int (*dmsg_handler)(struct Scsi_Host *shost, void *payload,
+				 u32 len, u32 pid);
+	void (*devt_handler)(struct notifier_block *nb,
+				 unsigned long event, void *notify_ptr);
+	struct scsi_host_template *hostt;
+	u64 vendor_id;
+	unsigned int refcnt;
+	int flags;
+};
+
+
 
 /**
  * scsi_nl_rcv_msg - Receive message handler.
@@ -45,8 +79,9 @@
 {
 	struct nlmsghdr *nlh;
 	struct scsi_nl_hdr *hdr;
-	uint32_t rlen;
-	int err;
+	unsigned long flags;
+	u32 rlen;
+	int err, tport;
 
 	while (skb->len >= NLMSG_SPACE(0)) {
 		err = 0;
@@ -65,7 +100,7 @@
 
 		if (nlh->nlmsg_type != SCSI_TRANSPORT_MSG) {
 			err = -EBADMSG;
-			return;
+			goto next_msg;
 		}
 
 		hdr = NLMSG_DATA(nlh);
@@ -83,12 +118,27 @@
 		if (nlh->nlmsg_len < (sizeof(*nlh) + hdr->msglen)) {
 			printk(KERN_WARNING "%s: discarding partial message\n",
 				 __func__);
-			return;
+			goto next_msg;
 		}
 
 		/*
-		 * We currently don't support anyone sending us a message
+		 * Deliver message to the appropriate transport
 		 */
+		spin_lock_irqsave(&scsi_nl_lock, flags);
+
+		tport = hdr->transport;
+		if ((tport < SCSI_NL_MAX_TRANSPORTS) &&
+		    !(transports[tport].flags & HANDLER_DELETING) &&
+		    (transports[tport].msg_handler)) {
+			transports[tport].refcnt++;
+			spin_unlock_irqrestore(&scsi_nl_lock, flags);
+			err = transports[tport].msg_handler(skb);
+			spin_lock_irqsave(&scsi_nl_lock, flags);
+			transports[tport].refcnt--;
+		} else
+			err = -ENOENT;
+
+		spin_unlock_irqrestore(&scsi_nl_lock, flags);
 
 next_msg:
 		if ((err) || (nlh->nlmsg_flags & NLM_F_ACK))
@@ -110,14 +160,42 @@
 scsi_nl_rcv_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
 	struct netlink_notify *n = ptr;
+	struct scsi_nl_drvr *driver;
+	unsigned long flags;
+	int tport;
 
 	if (n->protocol != NETLINK_SCSITRANSPORT)
 		return NOTIFY_DONE;
 
+	spin_lock_irqsave(&scsi_nl_lock, flags);
+	scsi_nl_state |= STATE_EHANDLER_BSY;
+
 	/*
-	 * Currently, we are not tracking PID's, etc. There is nothing
-	 * to handle.
+	 * Pass event on to any transports that may be listening
 	 */
+	for (tport = 0; tport < SCSI_NL_MAX_TRANSPORTS; tport++) {
+		if (!(transports[tport].flags & HANDLER_DELETING) &&
+		    (transports[tport].event_handler)) {
+			spin_unlock_irqrestore(&scsi_nl_lock, flags);
+			transports[tport].event_handler(this, event, ptr);
+			spin_lock_irqsave(&scsi_nl_lock, flags);
+		}
+	}
+
+	/*
+	 * Pass event on to any drivers that may be listening
+	 */
+	list_for_each_entry(driver, &scsi_nl_drivers, next) {
+		if (!(driver->flags & HANDLER_DELETING) &&
+		    (driver->devt_handler)) {
+			spin_unlock_irqrestore(&scsi_nl_lock, flags);
+			driver->devt_handler(this, event, ptr);
+			spin_lock_irqsave(&scsi_nl_lock, flags);
+		}
+	}
+
+	scsi_nl_state &= ~STATE_EHANDLER_BSY;
+	spin_unlock_irqrestore(&scsi_nl_lock, flags);
 
 	return NOTIFY_DONE;
 }
@@ -128,7 +206,281 @@
 
 
 /**
- * scsi_netlink_init - Called by SCSI subsystem to intialize the SCSI transport netlink interface
+ * GENERIC SCSI transport receive and event handlers
+ **/
+
+/**
+ * scsi_generic_msg_handler - receive message handler for GENERIC transport
+ * 			 messages
+ *
+ * @skb:		socket receive buffer
+ *
+ **/
+static int
+scsi_generic_msg_handler(struct sk_buff *skb)
+{
+	struct nlmsghdr *nlh = nlmsg_hdr(skb);
+	struct scsi_nl_hdr *snlh = NLMSG_DATA(nlh);
+	struct scsi_nl_drvr *driver;
+	struct Scsi_Host *shost;
+	unsigned long flags;
+	int err = 0, match, pid;
+
+	pid = NETLINK_CREDS(skb)->pid;
+
+	switch (snlh->msgtype) {
+	case SCSI_NL_SHOST_VENDOR:
+		{
+		struct scsi_nl_host_vendor_msg *msg = NLMSG_DATA(nlh);
+
+		/* Locate the driver that corresponds to the message */
+		spin_lock_irqsave(&scsi_nl_lock, flags);
+		match = 0;
+		list_for_each_entry(driver, &scsi_nl_drivers, next) {
+			if (driver->vendor_id == msg->vendor_id) {
+				match = 1;
+				break;
+			}
+		}
+
+		if ((!match) || (!driver->dmsg_handler)) {
+			spin_unlock_irqrestore(&scsi_nl_lock, flags);
+			err = -ESRCH;
+			goto rcv_exit;
+		}
+
+		if (driver->flags & HANDLER_DELETING) {
+			spin_unlock_irqrestore(&scsi_nl_lock, flags);
+			err = -ESHUTDOWN;
+			goto rcv_exit;
+		}
+
+		driver->refcnt++;
+		spin_unlock_irqrestore(&scsi_nl_lock, flags);
+
+
+		/* if successful, scsi_host_lookup takes a shost reference */
+		shost = scsi_host_lookup(msg->host_no);
+		if (!shost) {
+			err = -ENODEV;
+			goto driver_exit;
+		}
+
+		/* is this host owned by the vendor ? */
+		if (shost->hostt != driver->hostt) {
+			err = -EINVAL;
+			goto vendormsg_put;
+		}
+
+		/* pass message on to the driver */
+		err = driver->dmsg_handler(shost, (void *)&msg[1],
+					 msg->vmsg_datalen, pid);
+
+vendormsg_put:
+		/* release reference by scsi_host_lookup */
+		scsi_host_put(shost);
+
+driver_exit:
+		/* release our own reference on the registration object */
+		spin_lock_irqsave(&scsi_nl_lock, flags);
+		driver->refcnt--;
+		spin_unlock_irqrestore(&scsi_nl_lock, flags);
+		break;
+		}
+
+	default:
+		err = -EBADR;
+		break;
+	}
+
+rcv_exit:
+	if (err)
+		printk(KERN_WARNING "%s: Msgtype %d failed - err %d\n",
+			 __func__, snlh->msgtype, err);
+	return err;
+}
+
+
+/**
+ * scsi_nl_add_transport -
+ *    Registers message and event handlers for a transport. Enables
+ *    receipt of netlink messages and events to a transport.
+ *
+ * @tport:		transport registering handlers
+ * @msg_handler:	receive message handler callback
+ * @event_handler:	receive event handler callback
+ **/
+int
+scsi_nl_add_transport(u8 tport,
+	int (*msg_handler)(struct sk_buff *),
+	void (*event_handler)(struct notifier_block *, unsigned long, void *))
+{
+	unsigned long flags;
+	int err = 0;
+
+	if (tport >= SCSI_NL_MAX_TRANSPORTS)
+		return -EINVAL;
+
+	spin_lock_irqsave(&scsi_nl_lock, flags);
+
+	if (scsi_nl_state & STATE_EHANDLER_BSY) {
+		spin_unlock_irqrestore(&scsi_nl_lock, flags);
+		msleep(1);
+		spin_lock_irqsave(&scsi_nl_lock, flags);
+	}
+
+	if (transports[tport].msg_handler || transports[tport].event_handler) {
+		err = -EALREADY;
+		goto register_out;
+	}
+
+	transports[tport].msg_handler = msg_handler;
+	transports[tport].event_handler = event_handler;
+	transports[tport].flags = 0;
+	transports[tport].refcnt = 0;
+
+register_out:
+	spin_unlock_irqrestore(&scsi_nl_lock, flags);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(scsi_nl_add_transport);
+
+
+/**
+ * scsi_nl_remove_transport -
+ *    Disable transport receiption of messages and events
+ *
+ * @tport:		transport deregistering handlers
+ *
+ **/
+void
+scsi_nl_remove_transport(u8 tport)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&scsi_nl_lock, flags);
+	if (scsi_nl_state & STATE_EHANDLER_BSY) {
+		spin_unlock_irqrestore(&scsi_nl_lock, flags);
+		msleep(1);
+		spin_lock_irqsave(&scsi_nl_lock, flags);
+	}
+
+	if (tport < SCSI_NL_MAX_TRANSPORTS) {
+		transports[tport].flags |= HANDLER_DELETING;
+
+		while (transports[tport].refcnt != 0) {
+			spin_unlock_irqrestore(&scsi_nl_lock, flags);
+			schedule_timeout_uninterruptible(HZ/4);
+			spin_lock_irqsave(&scsi_nl_lock, flags);
+		}
+		transports[tport].msg_handler = NULL;
+		transports[tport].event_handler = NULL;
+		transports[tport].flags = 0;
+	}
+
+	spin_unlock_irqrestore(&scsi_nl_lock, flags);
+
+	return;
+}
+EXPORT_SYMBOL_GPL(scsi_nl_remove_transport);
+
+
+/**
+ * scsi_nl_add_driver -
+ *    A driver is registering its interfaces for SCSI netlink messages
+ *
+ * @vendor_id:          A unique identification value for the driver.
+ * @hostt:		address of the driver's host template. Used
+ *			to verify an shost is bound to the driver
+ * @nlmsg_handler:	receive message handler callback
+ * @nlevt_handler:	receive event handler callback
+ *
+ * Returns:
+ *   0 on Success
+ *   error result otherwise
+ **/
+int
+scsi_nl_add_driver(u64 vendor_id, struct scsi_host_template *hostt,
+	int (*nlmsg_handler)(struct Scsi_Host *shost, void *payload,
+				 u32 len, u32 pid),
+	void (*nlevt_handler)(struct notifier_block *nb,
+				 unsigned long event, void *notify_ptr))
+{
+	struct scsi_nl_drvr *driver;
+	unsigned long flags;
+
+	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
+	if (unlikely(!driver)) {
+		printk(KERN_ERR "%s: allocation failure\n", __func__);
+		return -ENOMEM;
+	}
+
+	driver->dmsg_handler = nlmsg_handler;
+	driver->devt_handler = nlevt_handler;
+	driver->hostt = hostt;
+	driver->vendor_id = vendor_id;
+
+	spin_lock_irqsave(&scsi_nl_lock, flags);
+	if (scsi_nl_state & STATE_EHANDLER_BSY) {
+		spin_unlock_irqrestore(&scsi_nl_lock, flags);
+		msleep(1);
+		spin_lock_irqsave(&scsi_nl_lock, flags);
+	}
+	list_add_tail(&driver->next, &scsi_nl_drivers);
+	spin_unlock_irqrestore(&scsi_nl_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(scsi_nl_add_driver);
+
+
+/**
+ * scsi_nl_remove_driver -
+ *    An driver is unregistering with the SCSI netlink messages
+ *
+ * @vendor_id:          The unique identification value for the driver.
+ **/
+void
+scsi_nl_remove_driver(u64 vendor_id)
+{
+	struct scsi_nl_drvr *driver;
+	unsigned long flags;
+
+	spin_lock_irqsave(&scsi_nl_lock, flags);
+	if (scsi_nl_state & STATE_EHANDLER_BSY) {
+		spin_unlock_irqrestore(&scsi_nl_lock, flags);
+		msleep(1);
+		spin_lock_irqsave(&scsi_nl_lock, flags);
+	}
+
+	list_for_each_entry(driver, &scsi_nl_drivers, next) {
+		if (driver->vendor_id == vendor_id) {
+			driver->flags |= HANDLER_DELETING;
+			while (driver->refcnt != 0) {
+				spin_unlock_irqrestore(&scsi_nl_lock, flags);
+				schedule_timeout_uninterruptible(HZ/4);
+				spin_lock_irqsave(&scsi_nl_lock, flags);
+			}
+			list_del(&driver->next);
+			kfree(driver);
+			spin_unlock_irqrestore(&scsi_nl_lock, flags);
+			return;
+		}
+	}
+
+	spin_unlock_irqrestore(&scsi_nl_lock, flags);
+
+	printk(KERN_ERR "%s: removal of driver failed - vendor_id 0x%llx\n",
+	       __func__, (unsigned long long)vendor_id);
+	return;
+}
+EXPORT_SYMBOL_GPL(scsi_nl_remove_driver);
+
+
+/**
+ * scsi_netlink_init - Called by SCSI subsystem to intialize
+ * 	the SCSI transport netlink interface
  *
  **/
 void
@@ -136,6 +488,8 @@
 {
 	int error;
 
+	INIT_LIST_HEAD(&scsi_nl_drivers);
+
 	error = netlink_register_notifier(&scsi_netlink_notifier);
 	if (error) {
 		printk(KERN_ERR "%s: register of event handler failed - %d\n",
@@ -150,8 +504,15 @@
 		printk(KERN_ERR "%s: register of recieve handler failed\n",
 				__func__);
 		netlink_unregister_notifier(&scsi_netlink_notifier);
+		return;
 	}
 
+	/* Register the entry points for the generic SCSI transport */
+	error = scsi_nl_add_transport(SCSI_NL_TRANSPORT,
+				scsi_generic_msg_handler, NULL);
+	if (error)
+		printk(KERN_ERR "%s: register of GENERIC transport handler"
+				"  failed - %d\n", __func__, error);
 	return;
 }
 
@@ -163,6 +524,8 @@
 void
 scsi_netlink_exit(void)
 {
+	scsi_nl_remove_transport(SCSI_NL_TRANSPORT);
+
 	if (scsi_nl_sock) {
 		netlink_kernel_release(scsi_nl_sock);
 		netlink_unregister_notifier(&scsi_netlink_notifier);
@@ -172,3 +535,147 @@
 }
 
 
+/*
+ * Exported Interfaces
+ */
+
+/**
+ * scsi_nl_send_transport_msg -
+ *    Generic function to send a single message from a SCSI transport to
+ *    a single process
+ *
+ * @pid:		receiving pid
+ * @hdr:		message payload
+ *
+ **/
+void
+scsi_nl_send_transport_msg(u32 pid, struct scsi_nl_hdr *hdr)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr	*nlh;
+	const char *fn;
+	char *datab;
+	u32 len, skblen;
+	int err;
+
+	if (!scsi_nl_sock) {
+		err = -ENOENT;
+		fn = "netlink socket";
+		goto msg_fail;
+	}
+
+	len = NLMSG_SPACE(hdr->msglen);
+	skblen = NLMSG_SPACE(len);
+
+	skb = alloc_skb(skblen, GFP_KERNEL);
+	if (!skb) {
+		err = -ENOBUFS;
+		fn = "alloc_skb";
+		goto msg_fail;
+	}
+
+	nlh = nlmsg_put(skb, pid, 0, SCSI_TRANSPORT_MSG, len - sizeof(*nlh), 0);
+	if (!nlh) {
+		err = -ENOBUFS;
+		fn = "nlmsg_put";
+		goto msg_fail_skb;
+	}
+	datab = NLMSG_DATA(nlh);
+	memcpy(datab, hdr, hdr->msglen);
+
+	err = nlmsg_unicast(scsi_nl_sock, skb, pid);
+	if (err < 0) {
+		fn = "nlmsg_unicast";
+		/* nlmsg_unicast already kfree_skb'd */
+		goto msg_fail;
+	}
+
+	return;
+
+msg_fail_skb:
+	kfree_skb(skb);
+msg_fail:
+	printk(KERN_WARNING
+		"%s: Dropped Message : pid %d Transport %d, msgtype x%x, "
+		"msglen %d: %s : err %d\n",
+		__func__, pid, hdr->transport, hdr->msgtype, hdr->msglen,
+		fn, err);
+	return;
+}
+EXPORT_SYMBOL_GPL(scsi_nl_send_transport_msg);
+
+
+/**
+ * scsi_nl_send_vendor_msg - called to send a shost vendor unique message
+ *                      to a specific process id.
+ *
+ * @pid:		process id of the receiver
+ * @host_no:		host # sending the message
+ * @vendor_id:		unique identifier for the driver's vendor
+ * @data_len:		amount, in bytes, of vendor unique payload data
+ * @data_buf:		pointer to vendor unique data buffer
+ *
+ * Returns:
+ *   0 on succesful return
+ *   otherwise, failing error code
+ *
+ * Notes:
+ *	This routine assumes no locks are held on entry.
+ */
+int
+scsi_nl_send_vendor_msg(u32 pid, unsigned short host_no, u64 vendor_id,
+			 char *data_buf, u32 data_len)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr	*nlh;
+	struct scsi_nl_host_vendor_msg *msg;
+	u32 len, skblen;
+	int err;
+
+	if (!scsi_nl_sock) {
+		err = -ENOENT;
+		goto send_vendor_fail;
+	}
+
+	len = SCSI_NL_MSGALIGN(sizeof(*msg) + data_len);
+	skblen = NLMSG_SPACE(len);
+
+	skb = alloc_skb(skblen, GFP_KERNEL);
+	if (!skb) {
+		err = -ENOBUFS;
+		goto send_vendor_fail;
+	}
+
+	nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG,
+				skblen - sizeof(*nlh), 0);
+	if (!nlh) {
+		err = -ENOBUFS;
+		goto send_vendor_fail_skb;
+	}
+	msg = NLMSG_DATA(nlh);
+
+	INIT_SCSI_NL_HDR(&msg->snlh, SCSI_NL_TRANSPORT,
+				SCSI_NL_SHOST_VENDOR, len);
+	msg->vendor_id = vendor_id;
+	msg->host_no = host_no;
+	msg->vmsg_datalen = data_len;	/* bytes */
+	memcpy(&msg[1], data_buf, data_len);
+
+	err = nlmsg_unicast(scsi_nl_sock, skb, pid);
+	if (err)
+		/* nlmsg_multicast already kfree_skb'd */
+		goto send_vendor_fail;
+
+	return 0;
+
+send_vendor_fail_skb:
+	kfree_skb(skb);
+send_vendor_fail:
+	printk(KERN_WARNING
+		"%s: Dropped SCSI Msg : host %d vendor_unique - err %d\n",
+		__func__, host_no, err);
+	return err;
+}
+EXPORT_SYMBOL(scsi_nl_send_vendor_msg);
+
+
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 79f0f75..6cddd5d 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -4,6 +4,7 @@
 #include <linux/device.h>
 
 struct request_queue;
+struct request;
 struct scsi_cmnd;
 struct scsi_device;
 struct scsi_host_template;
@@ -27,7 +28,6 @@
 extern int scsi_dispatch_cmd(struct scsi_cmnd *cmd);
 extern int scsi_setup_command_freelist(struct Scsi_Host *shost);
 extern void scsi_destroy_command_freelist(struct Scsi_Host *shost);
-extern void __scsi_done(struct scsi_cmnd *cmd);
 #ifdef CONFIG_SCSI_LOGGING
 void scsi_log_send(struct scsi_cmnd *cmd);
 void scsi_log_completion(struct scsi_cmnd *cmd, int disposition);
@@ -49,10 +49,7 @@
 extern void scsi_exit_devinfo(void);
 
 /* scsi_error.c */
-extern void scsi_add_timer(struct scsi_cmnd *, int,
-		void (*)(struct scsi_cmnd *));
-extern int scsi_delete_timer(struct scsi_cmnd *);
-extern void scsi_times_out(struct scsi_cmnd *cmd);
+extern enum blk_eh_timer_return scsi_times_out(struct request *req);
 extern int scsi_error_handler(void *host);
 extern int scsi_decide_disposition(struct scsi_cmnd *cmd);
 extern void scsi_eh_wakeup(struct Scsi_Host *shost);
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index c6a904a..82f7b2d 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -259,8 +259,8 @@
 	int error = -ENXIO;
 
 	shost = scsi_host_lookup(host);
-	if (IS_ERR(shost))
-		return PTR_ERR(shost);
+	if (!shost)
+		return error;
 
 	if (shost->transportt->user_scan)
 		error = shost->transportt->user_scan(shost, channel, id, lun);
@@ -287,8 +287,8 @@
 	int error = -ENXIO;
 
 	shost = scsi_host_lookup(host);
-	if (IS_ERR(shost))
-		return PTR_ERR(shost);
+	if (!shost)
+		return error;
 	sdev = scsi_device_lookup(shost, channel, id, lun);
 	if (sdev) {
 		scsi_remove_device(sdev);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 34d0de6..334862e 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -730,6 +730,8 @@
 static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
 		int *bflags, int async)
 {
+	int ret;
+
 	/*
 	 * XXX do not save the inquiry, since it can change underneath us,
 	 * save just vendor/model/rev.
@@ -885,7 +887,17 @@
 
 	/* set the device running here so that slave configure
 	 * may do I/O */
-	scsi_device_set_state(sdev, SDEV_RUNNING);
+	ret = scsi_device_set_state(sdev, SDEV_RUNNING);
+	if (ret) {
+		ret = scsi_device_set_state(sdev, SDEV_BLOCK);
+
+		if (ret) {
+			sdev_printk(KERN_ERR, sdev,
+				    "in wrong state %s to complete scan\n",
+				    scsi_device_state_name(sdev->sdev_state));
+			return SCSI_SCAN_NO_RESPONSE;
+		}
+	}
 
 	if (*bflags & BLIST_MS_192_BYTES_FOR_3F)
 		sdev->use_192_bytes_for_3f = 1;
@@ -899,7 +911,7 @@
 	transport_configure_device(&sdev->sdev_gendev);
 
 	if (sdev->host->hostt->slave_configure) {
-		int ret = sdev->host->hostt->slave_configure(sdev);
+		ret = sdev->host->hostt->slave_configure(sdev);
 		if (ret) {
 			/*
 			 * if LLDD reports slave not present, don't clutter
@@ -994,7 +1006,7 @@
 	 */
 	sdev = scsi_device_lookup_by_target(starget, lun);
 	if (sdev) {
-		if (rescan || sdev->sdev_state != SDEV_CREATED) {
+		if (rescan || !scsi_device_created(sdev)) {
 			SCSI_LOG_SCAN_BUS(3, printk(KERN_INFO
 				"scsi scan: device exists on %s\n",
 				sdev->sdev_gendev.bus_id));
@@ -1467,7 +1479,7 @@
 	kfree(lun_data);
  out:
 	scsi_device_put(sdev);
-	if (sdev->sdev_state == SDEV_CREATED)
+	if (scsi_device_created(sdev))
 		/*
 		 * the sdev we used didn't appear in the report luns scan
 		 */
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index ab3c718..93c28f3 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -34,6 +34,7 @@
 	{ SDEV_QUIESCE, "quiesce" },
 	{ SDEV_OFFLINE,	"offline" },
 	{ SDEV_BLOCK,	"blocked" },
+	{ SDEV_CREATED_BLOCK, "created-blocked" },
 };
 
 const char *scsi_device_state_name(enum scsi_device_state state)
@@ -560,12 +561,15 @@
 sdev_rd_attr (model, "%.16s\n");
 sdev_rd_attr (rev, "%.4s\n");
 
+/*
+ * TODO: can we make these symlinks to the block layer ones?
+ */
 static ssize_t
 sdev_show_timeout (struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct scsi_device *sdev;
 	sdev = to_scsi_device(dev);
-	return snprintf (buf, 20, "%d\n", sdev->timeout / HZ);
+	return snprintf(buf, 20, "%d\n", sdev->request_queue->rq_timeout / HZ);
 }
 
 static ssize_t
@@ -576,7 +580,7 @@
 	int timeout;
 	sdev = to_scsi_device(dev);
 	sscanf (buf, "%d\n", &timeout);
-	sdev->timeout = timeout * HZ;
+	blk_queue_rq_timeout(sdev->request_queue, timeout * HZ);
 	return count;
 }
 static DEVICE_ATTR(timeout, S_IRUGO | S_IWUSR, sdev_show_timeout, sdev_store_timeout);
diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c
index 257e097..48ba413 100644
--- a/drivers/scsi/scsi_tgt_lib.c
+++ b/drivers/scsi/scsi_tgt_lib.c
@@ -362,7 +362,7 @@
 	int err;
 
 	dprintk("%lx %u\n", uaddr, len);
-	err = blk_rq_map_user(q, rq, (void *)uaddr, len);
+	err = blk_rq_map_user(q, rq, NULL, (void *)uaddr, len, GFP_KERNEL);
 	if (err) {
 		/*
 		 * TODO: need to fixup sg_tablesize, max_segment_size,
@@ -460,7 +460,7 @@
 
 	/* TODO: replace with a O(1) alg */
 	shost = scsi_host_lookup(host_no);
-	if (IS_ERR(shost)) {
+	if (!shost) {
 		printk(KERN_ERR "Could not find host no %d\n", host_no);
 		return -EINVAL;
 	}
@@ -550,7 +550,7 @@
 	dprintk("%d %d %llx\n", host_no, result, (unsigned long long) mid);
 
 	shost = scsi_host_lookup(host_no);
-	if (IS_ERR(shost)) {
+	if (!shost) {
 		printk(KERN_ERR "Could not find host no %d\n", host_no);
 		return err;
 	}
@@ -603,7 +603,7 @@
 	dprintk("%d %d%llx\n", host_no, result, (unsigned long long)itn_id);
 
 	shost = scsi_host_lookup(host_no);
-	if (IS_ERR(shost)) {
+	if (!shost) {
 		printk(KERN_ERR "Could not find host no %d\n", host_no);
 		return err;
 	}
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 56823fd..d5f7653 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -40,31 +40,7 @@
 
 static int fc_queue_work(struct Scsi_Host *, struct work_struct *);
 static void fc_vport_sched_delete(struct work_struct *work);
-
-/*
- * This is a temporary carrier for creating a vport. It will eventually
- * be replaced  by a real message definition for sgio or netlink.
- *
- * fc_vport_identifiers: This set of data contains all elements
- * to uniquely identify and instantiate a FC virtual port.
- *
- * Notes:
- *   symbolic_name: The driver is to append the symbolic_name string data
- *      to the symbolic_node_name data that it generates by default.
- *      the resulting combination should then be registered with the switch.
- *      It is expected that things like Xen may stuff a VM title into
- *      this field.
- */
-struct fc_vport_identifiers {
-	u64 node_name;
-	u64 port_name;
-	u32 roles;
-	bool disable;
-	enum fc_port_type vport_type;	/* only FC_PORTTYPE_NPIV allowed */
-	char symbolic_name[FC_VPORT_SYMBOLIC_NAMELEN];
-};
-
-static int fc_vport_create(struct Scsi_Host *shost, int channel,
+static int fc_vport_setup(struct Scsi_Host *shost, int channel,
 	struct device *pdev, struct fc_vport_identifiers  *ids,
 	struct fc_vport **vport);
 
@@ -1760,7 +1736,7 @@
 	vid.disable = false;		/* always enabled */
 
 	/* we only allow support on Channel 0 !!! */
-	stat = fc_vport_create(shost, 0, &shost->shost_gendev, &vid, &vport);
+	stat = fc_vport_setup(shost, 0, &shost->shost_gendev, &vid, &vport);
 	return stat ? stat : count;
 }
 static FC_DEVICE_ATTR(host, vport_create, S_IWUSR, NULL,
@@ -1950,15 +1926,15 @@
  * Notes:
  *	This routine assumes no locks are held on entry.
  */
-static enum scsi_eh_timer_return
+static enum blk_eh_timer_return
 fc_timed_out(struct scsi_cmnd *scmd)
 {
 	struct fc_rport *rport = starget_to_rport(scsi_target(scmd->device));
 
 	if (rport->port_state == FC_PORTSTATE_BLOCKED)
-		return EH_RESET_TIMER;
+		return BLK_EH_RESET_TIMER;
 
-	return EH_NOT_HANDLED;
+	return BLK_EH_NOT_HANDLED;
 }
 
 /*
@@ -3103,7 +3079,7 @@
 
 
 /**
- * fc_vport_create - allocates and creates a FC virtual port.
+ * fc_vport_setup - allocates and creates a FC virtual port.
  * @shost:	scsi host the virtual port is connected to.
  * @channel:	Channel on shost port connected to.
  * @pdev:	parent device for vport
@@ -3118,7 +3094,7 @@
  *	This routine assumes no locks are held on entry.
  */
 static int
-fc_vport_create(struct Scsi_Host *shost, int channel, struct device *pdev,
+fc_vport_setup(struct Scsi_Host *shost, int channel, struct device *pdev,
 	struct fc_vport_identifiers  *ids, struct fc_vport **ret_vport)
 {
 	struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
@@ -3231,6 +3207,28 @@
 	return error;
 }
 
+/**
+ * fc_vport_create - Admin App or LLDD requests creation of a vport
+ * @shost:	scsi host the virtual port is connected to.
+ * @channel:	channel on shost port connected to.
+ * @ids:	The world wide names, FC4 port roles, etc for
+ *              the virtual port.
+ *
+ * Notes:
+ *	This routine assumes no locks are held on entry.
+ */
+struct fc_vport *
+fc_vport_create(struct Scsi_Host *shost, int channel,
+	struct fc_vport_identifiers *ids)
+{
+	int stat;
+	struct fc_vport *vport;
+
+	stat = fc_vport_setup(shost, channel, &shost->shost_gendev,
+		 ids, &vport);
+	return stat ? NULL : vport;
+}
+EXPORT_SYMBOL(fc_vport_create);
 
 /**
  * fc_vport_terminate - Admin App or LLDD requests termination of a vport
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 043c392..0ce5f7c 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1361,7 +1361,7 @@
 		return -EINVAL;
 
 	shost = scsi_host_lookup(ev->u.tgt_dscvr.host_no);
-	if (IS_ERR(shost)) {
+	if (!shost) {
 		printk(KERN_ERR "target discovery could not find host no %u\n",
 		       ev->u.tgt_dscvr.host_no);
 		return -ENODEV;
@@ -1387,7 +1387,7 @@
 		return -ENOSYS;
 
 	shost = scsi_host_lookup(ev->u.set_host_param.host_no);
-	if (IS_ERR(shost)) {
+	if (!shost) {
 		printk(KERN_ERR "set_host_param could not find host no %u\n",
 		       ev->u.set_host_param.host_no);
 		return -ENODEV;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index e5e7d78..a7b53be 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -47,6 +47,7 @@
 #include <linux/blkpg.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/string_helpers.h>
 #include <asm/uaccess.h>
 
 #include <scsi/scsi.h>
@@ -86,6 +87,12 @@
 MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD);
 MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC);
 
+#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
+#define SD_MINORS	16
+#else
+#define SD_MINORS	0
+#endif
+
 static int  sd_revalidate_disk(struct gendisk *);
 static int  sd_probe(struct device *);
 static int  sd_remove(struct device *);
@@ -159,7 +166,7 @@
 			sd_print_sense_hdr(sdkp, &sshdr);
 		return -EINVAL;
 	}
-	sd_revalidate_disk(sdkp->disk);
+	revalidate_disk(sdkp->disk);
 	return count;
 }
 
@@ -377,7 +384,6 @@
 	sector_t block = rq->sector;
 	sector_t threshold;
 	unsigned int this_count = rq->nr_sectors;
-	unsigned int timeout = sdp->timeout;
 	int ret;
 
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
@@ -578,7 +584,6 @@
 	SCpnt->transfersize = sdp->sector_size;
 	SCpnt->underflow = this_count << 9;
 	SCpnt->allowed = SD_MAX_RETRIES;
-	SCpnt->timeout_per_command = timeout;
 
 	/*
 	 * This indicates that the command is ready from our end to be
@@ -910,7 +915,7 @@
 	struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
 
 	if (sdkp) {
-		sd_revalidate_disk(sdkp->disk);
+		revalidate_disk(sdkp->disk);
 		scsi_disk_put(sdkp);
 	}
 }
@@ -1429,27 +1434,21 @@
 		 */
 		sector_size = 512;
 	}
-	{
-		/*
-		 * The msdos fs needs to know the hardware sector size
-		 * So I have created this table. See ll_rw_blk.c
-		 * Jacques Gelinas (Jacques@solucorp.qc.ca)
-		 */
-		int hard_sector = sector_size;
-		sector_t sz = (sdkp->capacity/2) * (hard_sector/256);
-		struct request_queue *queue = sdp->request_queue;
-		sector_t mb = sz;
+	blk_queue_hardsect_size(sdp->request_queue, sector_size);
 
-		blk_queue_hardsect_size(queue, hard_sector);
-		/* avoid 64-bit division on 32-bit platforms */
-		sector_div(sz, 625);
-		mb -= sz - 974;
-		sector_div(mb, 1950);
+	{
+		char cap_str_2[10], cap_str_10[10];
+		u64 sz = sdkp->capacity << ffz(~sector_size);
+
+		string_get_size(sz, STRING_UNITS_2, cap_str_2,
+				sizeof(cap_str_2));
+		string_get_size(sz, STRING_UNITS_10, cap_str_10,
+				sizeof(cap_str_10));
 
 		sd_printk(KERN_NOTICE, sdkp,
-			  "%llu %d-byte hardware sectors (%llu MB)\n",
+			  "%llu %d-byte hardware sectors: (%s/%s)\n",
 			  (unsigned long long)sdkp->capacity,
-			  hard_sector, (unsigned long long)mb);
+			  sector_size, cap_str_10, cap_str_2);
 	}
 
 	/* Rescale capacity to 512-byte units */
@@ -1764,6 +1763,52 @@
 }
 
 /**
+ *	sd_format_disk_name - format disk name
+ *	@prefix: name prefix - ie. "sd" for SCSI disks
+ *	@index: index of the disk to format name for
+ *	@buf: output buffer
+ *	@buflen: length of the output buffer
+ *
+ *	SCSI disk names starts at sda.  The 26th device is sdz and the
+ *	27th is sdaa.  The last one for two lettered suffix is sdzz
+ *	which is followed by sdaaa.
+ *
+ *	This is basically 26 base counting with one extra 'nil' entry
+ *	at the beggining from the second digit on and can be
+ *	determined using similar method as 26 base conversion with the
+ *	index shifted -1 after each digit is computed.
+ *
+ *	CONTEXT:
+ *	Don't care.
+ *
+ *	RETURNS:
+ *	0 on success, -errno on failure.
+ */
+static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen)
+{
+	const int base = 'z' - 'a' + 1;
+	char *begin = buf + strlen(prefix);
+	char *end = buf + buflen;
+	char *p;
+	int unit;
+
+	p = end - 1;
+	*p = '\0';
+	unit = base;
+	do {
+		if (p == begin)
+			return -EINVAL;
+		*--p = 'a' + (index % unit);
+		index = (index / unit) - 1;
+	} while (index >= 0);
+
+	memmove(begin, p, end - p);
+	memcpy(buf, prefix, strlen(prefix));
+
+	return 0;
+}
+
+/**
  *	sd_probe - called during driver initialization and whenever a
  *	new scsi device is attached to the system. It is called once
  *	for each scsi device (not just disks) present.
@@ -1801,7 +1846,7 @@
 	if (!sdkp)
 		goto out;
 
-	gd = alloc_disk(16);
+	gd = alloc_disk(SD_MINORS);
 	if (!gd)
 		goto out_free;
 
@@ -1815,8 +1860,8 @@
 	if (error)
 		goto out_put;
 
-	error = -EBUSY;
-	if (index >= SD_MAX_DISKS)
+	error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN);
+	if (error)
 		goto out_free_index;
 
 	sdkp->device = sdp;
@@ -1826,11 +1871,12 @@
 	sdkp->openers = 0;
 	sdkp->previous_state = 1;
 
-	if (!sdp->timeout) {
+	if (!sdp->request_queue->rq_timeout) {
 		if (sdp->type != TYPE_MOD)
-			sdp->timeout = SD_TIMEOUT;
+			blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT);
 		else
-			sdp->timeout = SD_MOD_TIMEOUT;
+			blk_queue_rq_timeout(sdp->request_queue,
+					     SD_MOD_TIMEOUT);
 	}
 
 	device_initialize(&sdkp->dev);
@@ -1843,24 +1889,12 @@
 
 	get_device(&sdp->sdev_gendev);
 
-	gd->major = sd_major((index & 0xf0) >> 4);
-	gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
-	gd->minors = 16;
-	gd->fops = &sd_fops;
-
-	if (index < 26) {
-		sprintf(gd->disk_name, "sd%c", 'a' + index % 26);
-	} else if (index < (26 + 1) * 26) {
-		sprintf(gd->disk_name, "sd%c%c",
-			'a' + index / 26 - 1,'a' + index % 26);
-	} else {
-		const unsigned int m1 = (index / 26 - 1) / 26 - 1;
-		const unsigned int m2 = (index / 26 - 1) % 26;
-		const unsigned int m3 =  index % 26;
-		sprintf(gd->disk_name, "sd%c%c%c",
-			'a' + m1, 'a' + m2, 'a' + m3);
+	if (index < SD_MAX_DISKS) {
+		gd->major = sd_major((index & 0xf0) >> 4);
+		gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
+		gd->minors = SD_MINORS;
 	}
-
+	gd->fops = &sd_fops;
 	gd->private_data = &sdkp->driver;
 	gd->queue = sdkp->device->request_queue;
 
@@ -1869,7 +1903,7 @@
 	blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
 
 	gd->driverfs_dev = &sdp->sdev_gendev;
-	gd->flags = GENHD_FL_DRIVERFS;
+	gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS;
 	if (sdp->removable)
 		gd->flags |= GENHD_FL_REMOVABLE;
 
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 661f9f2..ba9b9bb 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -47,7 +47,6 @@
 #include <linux/seq_file.h>
 #include <linux/blkdev.h>
 #include <linux/delay.h>
-#include <linux/scatterlist.h>
 #include <linux/blktrace_api.h>
 #include <linux/smp_lock.h>
 
@@ -69,7 +68,6 @@
 #endif
 
 #define SG_ALLOW_DIO_DEF 0
-#define SG_ALLOW_DIO_CODE /* compile out by commenting this define */
 
 #define SG_MAX_DEVS 32768
 
@@ -118,8 +116,8 @@
 	unsigned short k_use_sg; /* Count of kernel scatter-gather pieces */
 	unsigned sglist_len; /* size of malloc'd scatter-gather list ++ */
 	unsigned bufflen;	/* Size of (aggregate) data buffer */
-	unsigned b_malloc_len;	/* actual len malloc'ed in buffer */
-	struct scatterlist *buffer;/* scatter list */
+	struct page **pages;
+	int page_order;
 	char dio_in_use;	/* 0->indirect IO (or mmap), 1->dio */
 	unsigned char cmd_opcode; /* first byte of command */
 } Sg_scatter_hold;
@@ -137,6 +135,8 @@
 	char orphan;		/* 1 -> drop on sight, 0 -> normal */
 	char sg_io_owned;	/* 1 -> packet belongs to SG_IO */
 	volatile char done;	/* 0->before bh, 1->before read, 2->read */
+	struct request *rq;
+	struct bio *bio;
 } Sg_request;
 
 typedef struct sg_fd {		/* holds the state of a file descriptor */
@@ -175,8 +175,8 @@
 
 static int sg_fasync(int fd, struct file *filp, int mode);
 /* tasklet or soft irq callback */
-static void sg_cmd_done(void *data, char *sense, int result, int resid);
-static int sg_start_req(Sg_request * srp);
+static void sg_rq_end_io(struct request *rq, int uptodate);
+static int sg_start_req(Sg_request *srp, unsigned char *cmd);
 static void sg_finish_rem_req(Sg_request * srp);
 static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
 static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp,
@@ -188,17 +188,11 @@
 			int read_only, Sg_request **o_srp);
 static int sg_common_write(Sg_fd * sfp, Sg_request * srp,
 			   unsigned char *cmnd, int timeout, int blocking);
-static int sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind,
-		      int wr_xf, int *countp, unsigned char __user **up);
-static int sg_write_xfer(Sg_request * srp);
-static int sg_read_xfer(Sg_request * srp);
 static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer);
 static void sg_remove_scat(Sg_scatter_hold * schp);
 static void sg_build_reserve(Sg_fd * sfp, int req_size);
 static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size);
 static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp);
-static struct page *sg_page_malloc(int rqSz, int lowDma, int *retSzp);
-static void sg_page_free(struct page *page, int size);
 static Sg_fd *sg_add_sfp(Sg_device * sdp, int dev);
 static int sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp);
 static void __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp);
@@ -206,7 +200,6 @@
 static Sg_request *sg_add_request(Sg_fd * sfp);
 static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
 static int sg_res_in_use(Sg_fd * sfp);
-static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len);
 static Sg_device *sg_get_dev(int dev);
 #ifdef CONFIG_SCSI_PROC_FS
 static int sg_last_dev(void);
@@ -529,8 +522,7 @@
 		err = -EFAULT;
 		goto err_out;
 	}
-	err = sg_read_xfer(srp);
-      err_out:
+err_out:
 	sg_finish_rem_req(srp);
 	return (0 == err) ? count : err;
 }
@@ -612,7 +604,10 @@
 	else
 		hp->dxfer_direction = (mxsize > 0) ? SG_DXFER_FROM_DEV : SG_DXFER_NONE;
 	hp->dxfer_len = mxsize;
-	hp->dxferp = (char __user *)buf + cmd_size;
+	if (hp->dxfer_direction == SG_DXFER_TO_DEV)
+		hp->dxferp = (char __user *)buf + cmd_size;
+	else
+		hp->dxferp = NULL;
 	hp->sbp = NULL;
 	hp->timeout = old_hdr.reply_len;	/* structure abuse ... */
 	hp->flags = input_size;	/* structure abuse ... */
@@ -732,16 +727,12 @@
 	SCSI_LOG_TIMEOUT(4, printk("sg_common_write:  scsi opcode=0x%02x, cmd_size=%d\n",
 			  (int) cmnd[0], (int) hp->cmd_len));
 
-	if ((k = sg_start_req(srp))) {
+	k = sg_start_req(srp, cmnd);
+	if (k) {
 		SCSI_LOG_TIMEOUT(1, printk("sg_common_write: start_req err=%d\n", k));
 		sg_finish_rem_req(srp);
 		return k;	/* probably out of space --> ENOMEM */
 	}
-	if ((k = sg_write_xfer(srp))) {
-		SCSI_LOG_TIMEOUT(1, printk("sg_common_write: write_xfer, bad address\n"));
-		sg_finish_rem_req(srp);
-		return k;
-	}
 	if (sdp->detached) {
 		sg_finish_rem_req(srp);
 		return -ENODEV;
@@ -763,20 +754,11 @@
 		break;
 	}
 	hp->duration = jiffies_to_msecs(jiffies);
-/* Now send everything of to mid-level. The next time we hear about this
-   packet is when sg_cmd_done() is called (i.e. a callback). */
-	if (scsi_execute_async(sdp->device, cmnd, hp->cmd_len, data_dir, srp->data.buffer,
-				hp->dxfer_len, srp->data.k_use_sg, timeout,
-				SG_DEFAULT_RETRIES, srp, sg_cmd_done,
-				GFP_ATOMIC)) {
-		SCSI_LOG_TIMEOUT(1, printk("sg_common_write: scsi_execute_async failed\n"));
-		/*
-		 * most likely out of mem, but could also be a bad map
-		 */
-		sg_finish_rem_req(srp);
-		return -ENOMEM;
-	} else
-		return 0;
+
+	srp->rq->timeout = timeout;
+	blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk,
+			      srp->rq, 1, sg_rq_end_io);
+	return 0;
 }
 
 static int
@@ -1192,8 +1174,7 @@
 	Sg_fd *sfp;
 	unsigned long offset, len, sa;
 	Sg_scatter_hold *rsv_schp;
-	struct scatterlist *sg;
-	int k;
+	int k, length;
 
 	if ((NULL == vma) || (!(sfp = (Sg_fd *) vma->vm_private_data)))
 		return VM_FAULT_SIGBUS;
@@ -1203,15 +1184,14 @@
 		return VM_FAULT_SIGBUS;
 	SCSI_LOG_TIMEOUT(3, printk("sg_vma_fault: offset=%lu, scatg=%d\n",
 				   offset, rsv_schp->k_use_sg));
-	sg = rsv_schp->buffer;
 	sa = vma->vm_start;
-	for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end);
-	     ++k, sg = sg_next(sg)) {
+	length = 1 << (PAGE_SHIFT + rsv_schp->page_order);
+	for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) {
 		len = vma->vm_end - sa;
-		len = (len < sg->length) ? len : sg->length;
+		len = (len < length) ? len : length;
 		if (offset < len) {
-			struct page *page;
-			page = virt_to_page(page_address(sg_page(sg)) + offset);
+			struct page *page = nth_page(rsv_schp->pages[k],
+						     offset >> PAGE_SHIFT);
 			get_page(page);	/* increment page count */
 			vmf->page = page;
 			return 0; /* success */
@@ -1233,8 +1213,7 @@
 	Sg_fd *sfp;
 	unsigned long req_sz, len, sa;
 	Sg_scatter_hold *rsv_schp;
-	int k;
-	struct scatterlist *sg;
+	int k, length;
 
 	if ((!filp) || (!vma) || (!(sfp = (Sg_fd *) filp->private_data)))
 		return -ENXIO;
@@ -1248,11 +1227,10 @@
 		return -ENOMEM;	/* cannot map more than reserved buffer */
 
 	sa = vma->vm_start;
-	sg = rsv_schp->buffer;
-	for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end);
-	     ++k, sg = sg_next(sg)) {
+	length = 1 << (PAGE_SHIFT + rsv_schp->page_order);
+	for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) {
 		len = vma->vm_end - sa;
-		len = (len < sg->length) ? len : sg->length;
+		len = (len < length) ? len : length;
 		sa += len;
 	}
 
@@ -1263,16 +1241,19 @@
 	return 0;
 }
 
-/* This function is a "bottom half" handler that is called by the
- * mid level when a command is completed (or has failed). */
-static void
-sg_cmd_done(void *data, char *sense, int result, int resid)
+/*
+ * This function is a "bottom half" handler that is called by the mid
+ * level when a command is completed (or has failed).
+ */
+static void sg_rq_end_io(struct request *rq, int uptodate)
 {
-	Sg_request *srp = data;
+	struct sg_request *srp = rq->end_io_data;
 	Sg_device *sdp = NULL;
 	Sg_fd *sfp;
 	unsigned long iflags;
 	unsigned int ms;
+	char *sense;
+	int result, resid;
 
 	if (NULL == srp) {
 		printk(KERN_ERR "sg_cmd_done: NULL request\n");
@@ -1286,6 +1267,9 @@
 		return;
 	}
 
+	sense = rq->sense;
+	result = rq->errors;
+	resid = rq->data_len;
 
 	SCSI_LOG_TIMEOUT(4, printk("sg_cmd_done: %s, pack_id=%d, res=0x%x\n",
 		sdp->disk->disk_name, srp->header.pack_id, result));
@@ -1296,7 +1280,6 @@
 	if (0 != result) {
 		struct scsi_sense_hdr sshdr;
 
-		memcpy(srp->sense_b, sense, sizeof (srp->sense_b));
 		srp->header.status = 0xff & result;
 		srp->header.masked_status = status_byte(result);
 		srp->header.msg_status = msg_byte(result);
@@ -1634,37 +1617,79 @@
 	idr_destroy(&sg_index_idr);
 }
 
-static int
-sg_start_req(Sg_request * srp)
+static int sg_start_req(Sg_request *srp, unsigned char *cmd)
 {
 	int res;
+	struct request *rq;
 	Sg_fd *sfp = srp->parentfp;
 	sg_io_hdr_t *hp = &srp->header;
 	int dxfer_len = (int) hp->dxfer_len;
 	int dxfer_dir = hp->dxfer_direction;
+	unsigned int iov_count = hp->iovec_count;
 	Sg_scatter_hold *req_schp = &srp->data;
 	Sg_scatter_hold *rsv_schp = &sfp->reserve;
+	struct request_queue *q = sfp->parentdp->device->request_queue;
+	struct rq_map_data *md, map_data;
+	int rw = hp->dxfer_direction == SG_DXFER_TO_DEV ? WRITE : READ;
 
-	SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len));
+	SCSI_LOG_TIMEOUT(4, printk(KERN_INFO "sg_start_req: dxfer_len=%d\n",
+				   dxfer_len));
+
+	rq = blk_get_request(q, rw, GFP_ATOMIC);
+	if (!rq)
+		return -ENOMEM;
+
+	memcpy(rq->cmd, cmd, hp->cmd_len);
+
+	rq->cmd_len = hp->cmd_len;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+
+	srp->rq = rq;
+	rq->end_io_data = srp;
+	rq->sense = srp->sense_b;
+	rq->retries = SG_DEFAULT_RETRIES;
+
 	if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE))
 		return 0;
-	if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) &&
-	    (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) &&
-	    (!sfp->parentdp->device->host->unchecked_isa_dma)) {
-		res = sg_build_direct(srp, sfp, dxfer_len);
-		if (res <= 0)	/* -ve -> error, 0 -> done, 1 -> try indirect */
-			return res;
+
+	if (sg_allow_dio && hp->flags & SG_FLAG_DIRECT_IO &&
+	    dxfer_dir != SG_DXFER_UNKNOWN && !iov_count &&
+	    !sfp->parentdp->device->host->unchecked_isa_dma &&
+	    blk_rq_aligned(q, hp->dxferp, dxfer_len))
+		md = NULL;
+	else
+		md = &map_data;
+
+	if (md) {
+		if (!sg_res_in_use(sfp) && dxfer_len <= rsv_schp->bufflen)
+			sg_link_reserve(sfp, srp, dxfer_len);
+		else {
+			res = sg_build_indirect(req_schp, sfp, dxfer_len);
+			if (res)
+				return res;
+		}
+
+		md->pages = req_schp->pages;
+		md->page_order = req_schp->page_order;
+		md->nr_entries = req_schp->k_use_sg;
 	}
-	if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen))
-		sg_link_reserve(sfp, srp, dxfer_len);
-	else {
-		res = sg_build_indirect(req_schp, sfp, dxfer_len);
-		if (res) {
-			sg_remove_scat(req_schp);
-			return res;
+
+	if (iov_count)
+		res = blk_rq_map_user_iov(q, rq, md, hp->dxferp, iov_count,
+					  hp->dxfer_len, GFP_ATOMIC);
+	else
+		res = blk_rq_map_user(q, rq, md, hp->dxferp,
+				      hp->dxfer_len, GFP_ATOMIC);
+
+	if (!res) {
+		srp->bio = rq->bio;
+
+		if (!md) {
+			req_schp->dio_in_use = 1;
+			hp->info |= SG_INFO_DIRECT_IO;
 		}
 	}
-	return 0;
+	return res;
 }
 
 static void
@@ -1678,186 +1703,37 @@
 		sg_unlink_reserve(sfp, srp);
 	else
 		sg_remove_scat(req_schp);
+
+	if (srp->rq) {
+		if (srp->bio)
+			blk_rq_unmap_user(srp->bio);
+
+		blk_put_request(srp->rq);
+	}
+
 	sg_remove_request(sfp, srp);
 }
 
 static int
 sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize)
 {
-	int sg_bufflen = tablesize * sizeof(struct scatterlist);
+	int sg_bufflen = tablesize * sizeof(struct page *);
 	gfp_t gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
 
-	/*
-	 * TODO: test without low_dma, we should not need it since
-	 * the block layer will bounce the buffer for us
-	 *
-	 * XXX(hch): we shouldn't need GFP_DMA for the actual S/G list.
-	 */
-	if (sfp->low_dma)
-		 gfp_flags |= GFP_DMA;
-	schp->buffer = kzalloc(sg_bufflen, gfp_flags);
-	if (!schp->buffer)
+	schp->pages = kzalloc(sg_bufflen, gfp_flags);
+	if (!schp->pages)
 		return -ENOMEM;
-	sg_init_table(schp->buffer, tablesize);
 	schp->sglist_len = sg_bufflen;
 	return tablesize;	/* number of scat_gath elements allocated */
 }
 
-#ifdef SG_ALLOW_DIO_CODE
-/* vvvvvvvv  following code borrowed from st driver's direct IO vvvvvvvvv */
-	/* TODO: hopefully we can use the generic block layer code */
-
-/* Pin down user pages and put them into a scatter gather list. Returns <= 0 if
-   - mapping of all pages not successful
-   (i.e., either completely successful or fails)
-*/
-static int 
-st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages, 
-	          unsigned long uaddr, size_t count, int rw)
-{
-	unsigned long end = (uaddr + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	unsigned long start = uaddr >> PAGE_SHIFT;
-	const int nr_pages = end - start;
-	int res, i, j;
-	struct page **pages;
-
-	/* User attempted Overflow! */
-	if ((uaddr + count) < uaddr)
-		return -EINVAL;
-
-	/* Too big */
-        if (nr_pages > max_pages)
-		return -ENOMEM;
-
-	/* Hmm? */
-	if (count == 0)
-		return 0;
-
-	if ((pages = kmalloc(max_pages * sizeof(*pages), GFP_ATOMIC)) == NULL)
-		return -ENOMEM;
-
-        /* Try to fault in all of the necessary pages */
-	down_read(&current->mm->mmap_sem);
-        /* rw==READ means read from drive, write into memory area */
-	res = get_user_pages(
-		current,
-		current->mm,
-		uaddr,
-		nr_pages,
-		rw == READ,
-		0, /* don't force */
-		pages,
-		NULL);
-	up_read(&current->mm->mmap_sem);
-
-	/* Errors and no page mapped should return here */
-	if (res < nr_pages)
-		goto out_unmap;
-
-        for (i=0; i < nr_pages; i++) {
-                /* FIXME: flush superflous for rw==READ,
-                 * probably wrong function for rw==WRITE
-                 */
-		flush_dcache_page(pages[i]);
-		/* ?? Is locking needed? I don't think so */
-		/* if (!trylock_page(pages[i]))
-		   goto out_unlock; */
-        }
-
-	sg_set_page(sgl, pages[0], 0, uaddr & ~PAGE_MASK);
-	if (nr_pages > 1) {
-		sgl[0].length = PAGE_SIZE - sgl[0].offset;
-		count -= sgl[0].length;
-		for (i=1; i < nr_pages ; i++)
-			sg_set_page(&sgl[i], pages[i], count < PAGE_SIZE ? count : PAGE_SIZE, 0);
-	}
-	else {
-		sgl[0].length = count;
-	}
-
-	kfree(pages);
-	return nr_pages;
-
- out_unmap:
-	if (res > 0) {
-		for (j=0; j < res; j++)
-			page_cache_release(pages[j]);
-		res = 0;
-	}
-	kfree(pages);
-	return res;
-}
-
-
-/* And unmap them... */
-static int 
-st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages,
-		    int dirtied)
-{
-	int i;
-
-	for (i=0; i < nr_pages; i++) {
-		struct page *page = sg_page(&sgl[i]);
-
-		if (dirtied)
-			SetPageDirty(page);
-		/* unlock_page(page); */
-		/* FIXME: cache flush missing for rw==READ
-		 * FIXME: call the correct reference counting function
-		 */
-		page_cache_release(page);
-	}
-
-	return 0;
-}
-
-/* ^^^^^^^^  above code borrowed from st driver's direct IO ^^^^^^^^^ */
-#endif
-
-
-/* Returns: -ve -> error, 0 -> done, 1 -> try indirect */
-static int
-sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len)
-{
-#ifdef SG_ALLOW_DIO_CODE
-	sg_io_hdr_t *hp = &srp->header;
-	Sg_scatter_hold *schp = &srp->data;
-	int sg_tablesize = sfp->parentdp->sg_tablesize;
-	int mx_sc_elems, res;
-	struct scsi_device *sdev = sfp->parentdp->device;
-
-	if (((unsigned long)hp->dxferp &
-			queue_dma_alignment(sdev->request_queue)) != 0)
-		return 1;
-
-	mx_sc_elems = sg_build_sgat(schp, sfp, sg_tablesize);
-        if (mx_sc_elems <= 0) {
-                return 1;
-        }
-	res = st_map_user_pages(schp->buffer, mx_sc_elems,
-				(unsigned long)hp->dxferp, dxfer_len, 
-				(SG_DXFER_TO_DEV == hp->dxfer_direction) ? 1 : 0);
-	if (res <= 0) {
-		sg_remove_scat(schp);
-		return 1;
-	}
-	schp->k_use_sg = res;
-	schp->dio_in_use = 1;
-	hp->info |= SG_INFO_DIRECT_IO;
-	return 0;
-#else
-	return 1;
-#endif
-}
-
 static int
 sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
 {
-	struct scatterlist *sg;
-	int ret_sz = 0, k, rem_sz, num, mx_sc_elems;
+	int ret_sz = 0, i, k, rem_sz, num, mx_sc_elems;
 	int sg_tablesize = sfp->parentdp->sg_tablesize;
-	int blk_size = buff_size;
-	struct page *p = NULL;
+	int blk_size = buff_size, order;
+	gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN;
 
 	if (blk_size < 0)
 		return -EFAULT;
@@ -1881,15 +1757,26 @@
 		} else
 			scatter_elem_sz_prev = num;
 	}
-	for (k = 0, sg = schp->buffer, rem_sz = blk_size;
-	     (rem_sz > 0) && (k < mx_sc_elems);
-	     ++k, rem_sz -= ret_sz, sg = sg_next(sg)) {
-		
+
+	if (sfp->low_dma)
+		gfp_mask |= GFP_DMA;
+
+	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
+		gfp_mask |= __GFP_ZERO;
+
+	order = get_order(num);
+retry:
+	ret_sz = 1 << (PAGE_SHIFT + order);
+
+	for (k = 0, rem_sz = blk_size; rem_sz > 0 && k < mx_sc_elems;
+	     k++, rem_sz -= ret_sz) {
+
 		num = (rem_sz > scatter_elem_sz_prev) ?
-		      scatter_elem_sz_prev : rem_sz;
-		p = sg_page_malloc(num, sfp->low_dma, &ret_sz);
-		if (!p)
-			return -ENOMEM;
+			scatter_elem_sz_prev : rem_sz;
+
+		schp->pages[k] = alloc_pages(gfp_mask, order);
+		if (!schp->pages[k])
+			goto out;
 
 		if (num == scatter_elem_sz_prev) {
 			if (unlikely(ret_sz > scatter_elem_sz_prev)) {
@@ -1897,12 +1784,12 @@
 				scatter_elem_sz_prev = ret_sz;
 			}
 		}
-		sg_set_page(sg, p, (ret_sz > num) ? num : ret_sz, 0);
 
 		SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, "
 				 "ret_sz=%d\n", k, num, ret_sz));
 	}		/* end of for loop */
 
+	schp->page_order = order;
 	schp->k_use_sg = k;
 	SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k_use_sg=%d, "
 			 "rem_sz=%d\n", k, rem_sz));
@@ -1910,223 +1797,42 @@
 	schp->bufflen = blk_size;
 	if (rem_sz > 0)	/* must have failed */
 		return -ENOMEM;
-
 	return 0;
-}
+out:
+	for (i = 0; i < k; i++)
+		__free_pages(schp->pages[k], order);
 
-static int
-sg_write_xfer(Sg_request * srp)
-{
-	sg_io_hdr_t *hp = &srp->header;
-	Sg_scatter_hold *schp = &srp->data;
-	struct scatterlist *sg = schp->buffer;
-	int num_xfer = 0;
-	int j, k, onum, usglen, ksglen, res;
-	int iovec_count = (int) hp->iovec_count;
-	int dxfer_dir = hp->dxfer_direction;
-	unsigned char *p;
-	unsigned char __user *up;
-	int new_interface = ('\0' == hp->interface_id) ? 0 : 1;
+	if (--order >= 0)
+		goto retry;
 
-	if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_TO_DEV == dxfer_dir) ||
-	    (SG_DXFER_TO_FROM_DEV == dxfer_dir)) {
-		num_xfer = (int) (new_interface ? hp->dxfer_len : hp->flags);
-		if (schp->bufflen < num_xfer)
-			num_xfer = schp->bufflen;
-	}
-	if ((num_xfer <= 0) || (schp->dio_in_use) ||
-	    (new_interface
-	     && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags)))
-		return 0;
-
-	SCSI_LOG_TIMEOUT(4, printk("sg_write_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n",
-			  num_xfer, iovec_count, schp->k_use_sg));
-	if (iovec_count) {
-		onum = iovec_count;
-		if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum))
-			return -EFAULT;
-	} else
-		onum = 1;
-
-	ksglen = sg->length;
-	p = page_address(sg_page(sg));
-	for (j = 0, k = 0; j < onum; ++j) {
-		res = sg_u_iovec(hp, iovec_count, j, 1, &usglen, &up);
-		if (res)
-			return res;
-
-		for (; p; sg = sg_next(sg), ksglen = sg->length,
-		     p = page_address(sg_page(sg))) {
-			if (usglen <= 0)
-				break;
-			if (ksglen > usglen) {
-				if (usglen >= num_xfer) {
-					if (__copy_from_user(p, up, num_xfer))
-						return -EFAULT;
-					return 0;
-				}
-				if (__copy_from_user(p, up, usglen))
-					return -EFAULT;
-				p += usglen;
-				ksglen -= usglen;
-				break;
-			} else {
-				if (ksglen >= num_xfer) {
-					if (__copy_from_user(p, up, num_xfer))
-						return -EFAULT;
-					return 0;
-				}
-				if (__copy_from_user(p, up, ksglen))
-					return -EFAULT;
-				up += ksglen;
-				usglen -= ksglen;
-			}
-			++k;
-			if (k >= schp->k_use_sg)
-				return 0;
-		}
-	}
-
-	return 0;
-}
-
-static int
-sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind,
-	   int wr_xf, int *countp, unsigned char __user **up)
-{
-	int num_xfer = (int) hp->dxfer_len;
-	unsigned char __user *p = hp->dxferp;
-	int count;
-
-	if (0 == sg_num) {
-		if (wr_xf && ('\0' == hp->interface_id))
-			count = (int) hp->flags;	/* holds "old" input_size */
-		else
-			count = num_xfer;
-	} else {
-		sg_iovec_t iovec;
-		if (__copy_from_user(&iovec, p + ind*SZ_SG_IOVEC, SZ_SG_IOVEC))
-			return -EFAULT;
-		p = iovec.iov_base;
-		count = (int) iovec.iov_len;
-	}
-	if (!access_ok(wr_xf ? VERIFY_READ : VERIFY_WRITE, p, count))
-		return -EFAULT;
-	if (up)
-		*up = p;
-	if (countp)
-		*countp = count;
-	return 0;
+	return -ENOMEM;
 }
 
 static void
 sg_remove_scat(Sg_scatter_hold * schp)
 {
 	SCSI_LOG_TIMEOUT(4, printk("sg_remove_scat: k_use_sg=%d\n", schp->k_use_sg));
-	if (schp->buffer && (schp->sglist_len > 0)) {
-		struct scatterlist *sg = schp->buffer;
-
-		if (schp->dio_in_use) {
-#ifdef SG_ALLOW_DIO_CODE
-			st_unmap_user_pages(sg, schp->k_use_sg, TRUE);
-#endif
-		} else {
+	if (schp->pages && schp->sglist_len > 0) {
+		if (!schp->dio_in_use) {
 			int k;
 
-			for (k = 0; (k < schp->k_use_sg) && sg_page(sg);
-			     ++k, sg = sg_next(sg)) {
+			for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) {
 				SCSI_LOG_TIMEOUT(5, printk(
-				    "sg_remove_scat: k=%d, pg=0x%p, len=%d\n",
-				    k, sg_page(sg), sg->length));
-				sg_page_free(sg_page(sg), sg->length);
+				    "sg_remove_scat: k=%d, pg=0x%p\n",
+				    k, schp->pages[k]));
+				__free_pages(schp->pages[k], schp->page_order);
 			}
+
+			kfree(schp->pages);
 		}
-		kfree(schp->buffer);
 	}
 	memset(schp, 0, sizeof (*schp));
 }
 
 static int
-sg_read_xfer(Sg_request * srp)
-{
-	sg_io_hdr_t *hp = &srp->header;
-	Sg_scatter_hold *schp = &srp->data;
-	struct scatterlist *sg = schp->buffer;
-	int num_xfer = 0;
-	int j, k, onum, usglen, ksglen, res;
-	int iovec_count = (int) hp->iovec_count;
-	int dxfer_dir = hp->dxfer_direction;
-	unsigned char *p;
-	unsigned char __user *up;
-	int new_interface = ('\0' == hp->interface_id) ? 0 : 1;
-
-	if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_FROM_DEV == dxfer_dir)
-	    || (SG_DXFER_TO_FROM_DEV == dxfer_dir)) {
-		num_xfer = hp->dxfer_len;
-		if (schp->bufflen < num_xfer)
-			num_xfer = schp->bufflen;
-	}
-	if ((num_xfer <= 0) || (schp->dio_in_use) ||
-	    (new_interface
-	     && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags)))
-		return 0;
-
-	SCSI_LOG_TIMEOUT(4, printk("sg_read_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n",
-			  num_xfer, iovec_count, schp->k_use_sg));
-	if (iovec_count) {
-		onum = iovec_count;
-		if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum))
-			return -EFAULT;
-	} else
-		onum = 1;
-
-	p = page_address(sg_page(sg));
-	ksglen = sg->length;
-	for (j = 0, k = 0; j < onum; ++j) {
-		res = sg_u_iovec(hp, iovec_count, j, 0, &usglen, &up);
-		if (res)
-			return res;
-
-		for (; p; sg = sg_next(sg), ksglen = sg->length,
-		     p = page_address(sg_page(sg))) {
-			if (usglen <= 0)
-				break;
-			if (ksglen > usglen) {
-				if (usglen >= num_xfer) {
-					if (__copy_to_user(up, p, num_xfer))
-						return -EFAULT;
-					return 0;
-				}
-				if (__copy_to_user(up, p, usglen))
-					return -EFAULT;
-				p += usglen;
-				ksglen -= usglen;
-				break;
-			} else {
-				if (ksglen >= num_xfer) {
-					if (__copy_to_user(up, p, num_xfer))
-						return -EFAULT;
-					return 0;
-				}
-				if (__copy_to_user(up, p, ksglen))
-					return -EFAULT;
-				up += ksglen;
-				usglen -= ksglen;
-			}
-			++k;
-			if (k >= schp->k_use_sg)
-				return 0;
-		}
-	}
-
-	return 0;
-}
-
-static int
 sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer)
 {
 	Sg_scatter_hold *schp = &srp->data;
-	struct scatterlist *sg = schp->buffer;
 	int k, num;
 
 	SCSI_LOG_TIMEOUT(4, printk("sg_read_oxfer: num_read_xfer=%d\n",
@@ -2134,15 +1840,15 @@
 	if ((!outp) || (num_read_xfer <= 0))
 		return 0;
 
-	for (k = 0; (k < schp->k_use_sg) && sg_page(sg); ++k, sg = sg_next(sg)) {
-		num = sg->length;
+	num = 1 << (PAGE_SHIFT + schp->page_order);
+	for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) {
 		if (num > num_read_xfer) {
-			if (__copy_to_user(outp, page_address(sg_page(sg)),
+			if (__copy_to_user(outp, page_address(schp->pages[k]),
 					   num_read_xfer))
 				return -EFAULT;
 			break;
 		} else {
-			if (__copy_to_user(outp, page_address(sg_page(sg)),
+			if (__copy_to_user(outp, page_address(schp->pages[k]),
 					   num))
 				return -EFAULT;
 			num_read_xfer -= num;
@@ -2177,24 +1883,21 @@
 {
 	Sg_scatter_hold *req_schp = &srp->data;
 	Sg_scatter_hold *rsv_schp = &sfp->reserve;
-	struct scatterlist *sg = rsv_schp->buffer;
 	int k, num, rem;
 
 	srp->res_used = 1;
 	SCSI_LOG_TIMEOUT(4, printk("sg_link_reserve: size=%d\n", size));
 	rem = size;
 
-	for (k = 0; k < rsv_schp->k_use_sg; ++k, sg = sg_next(sg)) {
-		num = sg->length;
+	num = 1 << (PAGE_SHIFT + rsv_schp->page_order);
+	for (k = 0; k < rsv_schp->k_use_sg; k++) {
 		if (rem <= num) {
-			sfp->save_scat_len = num;
-			sg->length = rem;
 			req_schp->k_use_sg = k + 1;
 			req_schp->sglist_len = rsv_schp->sglist_len;
-			req_schp->buffer = rsv_schp->buffer;
+			req_schp->pages = rsv_schp->pages;
 
 			req_schp->bufflen = size;
-			req_schp->b_malloc_len = rsv_schp->b_malloc_len;
+			req_schp->page_order = rsv_schp->page_order;
 			break;
 		} else
 			rem -= num;
@@ -2208,22 +1911,13 @@
 sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp)
 {
 	Sg_scatter_hold *req_schp = &srp->data;
-	Sg_scatter_hold *rsv_schp = &sfp->reserve;
 
 	SCSI_LOG_TIMEOUT(4, printk("sg_unlink_reserve: req->k_use_sg=%d\n",
 				   (int) req_schp->k_use_sg));
-	if ((rsv_schp->k_use_sg > 0) && (req_schp->k_use_sg > 0)) {
-		struct scatterlist *sg = rsv_schp->buffer;
-
-		if (sfp->save_scat_len > 0)
-			(sg + (req_schp->k_use_sg - 1))->length =
-			    (unsigned) sfp->save_scat_len;
-		else
-			SCSI_LOG_TIMEOUT(1, printk ("sg_unlink_reserve: BAD save_scat_len\n"));
-	}
 	req_schp->k_use_sg = 0;
 	req_schp->bufflen = 0;
-	req_schp->buffer = NULL;
+	req_schp->pages = NULL;
+	req_schp->page_order = 0;
 	req_schp->sglist_len = 0;
 	sfp->save_scat_len = 0;
 	srp->res_used = 0;
@@ -2481,53 +2175,6 @@
 	return srp ? 1 : 0;
 }
 
-/* The size fetched (value output via retSzp) set when non-NULL return */
-static struct page *
-sg_page_malloc(int rqSz, int lowDma, int *retSzp)
-{
-	struct page *resp = NULL;
-	gfp_t page_mask;
-	int order, a_size;
-	int resSz;
-
-	if ((rqSz <= 0) || (NULL == retSzp))
-		return resp;
-
-	if (lowDma)
-		page_mask = GFP_ATOMIC | GFP_DMA | __GFP_COMP | __GFP_NOWARN;
-	else
-		page_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN;
-
-	for (order = 0, a_size = PAGE_SIZE; a_size < rqSz;
-	     order++, a_size <<= 1) ;
-	resSz = a_size;		/* rounded up if necessary */
-	resp = alloc_pages(page_mask, order);
-	while ((!resp) && order) {
-		--order;
-		a_size >>= 1;	/* divide by 2, until PAGE_SIZE */
-		resp =  alloc_pages(page_mask, order);	/* try half */
-		resSz = a_size;
-	}
-	if (resp) {
-		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
-			memset(page_address(resp), 0, resSz);
-		*retSzp = resSz;
-	}
-	return resp;
-}
-
-static void
-sg_page_free(struct page *page, int size)
-{
-	int order, a_size;
-
-	if (!page)
-		return;
-	for (order = 0, a_size = PAGE_SIZE; a_size < size;
-	     order++, a_size <<= 1) ;
-	__free_pages(page, order);
-}
-
 #ifdef CONFIG_SCSI_PROC_FS
 static int
 sg_idr_max_id(int id, void *p, void *data)
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 27f5bfd..0f17009 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -331,7 +331,7 @@
 
 static int sr_prep_fn(struct request_queue *q, struct request *rq)
 {
-	int block=0, this_count, s_size, timeout = SR_TIMEOUT;
+	int block = 0, this_count, s_size;
 	struct scsi_cd *cd;
 	struct scsi_cmnd *SCpnt;
 	struct scsi_device *sdp = q->queuedata;
@@ -461,7 +461,6 @@
 	SCpnt->transfersize = cd->device->sector_size;
 	SCpnt->underflow = this_count << 9;
 	SCpnt->allowed = MAX_RETRIES;
-	SCpnt->timeout_per_command = timeout;
 
 	/*
 	 * This indicates that the command is ready from our end to be
@@ -620,6 +619,8 @@
 	disk->fops = &sr_bdops;
 	disk->flags = GENHD_FL_CD;
 
+	blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
+
 	cd->device = sdev;
 	cd->disk = disk;
 	cd->driver = &sr_template;
@@ -878,7 +879,7 @@
 	struct gendisk *disk = cd->disk;
 
 	spin_lock(&sr_index_lock);
-	clear_bit(disk->first_minor, sr_index_bits);
+	clear_bit(MINOR(disk_devt(disk)), sr_index_bits);
 	spin_unlock(&sr_index_lock);
 
 	unregister_cdrom(&cd->cdi);
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index d39107b..f4e6cde 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -519,8 +519,8 @@
 	 *  Shorten our settle_time if needed for 
 	 *  this command not to time out.
 	 */
-	if (np->s.settle_time_valid && cmd->timeout_per_command) {
-		unsigned long tlimit = jiffies + cmd->timeout_per_command;
+	if (np->s.settle_time_valid && cmd->request->timeout) {
+		unsigned long tlimit = jiffies + cmd->request->timeout;
 		tlimit -= SYM_CONF_TIMER_INTERVAL*2;
 		if (time_after(np->s.settle_time, tlimit)) {
 			np->s.settle_time = tlimit;
diff --git a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c
index 1723d71..69ac6e5 100644
--- a/drivers/scsi/tmscsim.c
+++ b/drivers/scsi/tmscsim.c
@@ -2573,8 +2573,8 @@
 static int __init dc390_module_init(void)
 {
 	if (!disable_clustering)
-		printk(KERN_INFO "DC390: clustering now enabled by default. If you get problems load\n"
-		       "\twith \"disable_clustering=1\" and report to maintainers\n");
+		printk(KERN_INFO "DC390: clustering now enabled by default. If you get problems load\n");
+		printk(KERN_INFO "       with \"disable_clustering=1\" and report to maintainers\n");
 
 	if (tmscsim[0] == -1 || tmscsim[0] > 15) {
 		tmscsim[0] = 7;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index c3e174b..19caf7c 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -107,7 +107,8 @@
 	BUG_ON(bip == NULL);
 
 	/* A cloned bio doesn't own the integrity metadata */
-	if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL)
+	if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY)
+	    && bip->bip_buf != NULL)
 		kfree(bip->bip_buf);
 
 	mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
@@ -150,6 +151,24 @@
 }
 EXPORT_SYMBOL(bio_integrity_add_page);
 
+static int bdev_integrity_enabled(struct block_device *bdev, int rw)
+{
+	struct blk_integrity *bi = bdev_get_integrity(bdev);
+
+	if (bi == NULL)
+		return 0;
+
+	if (rw == READ && bi->verify_fn != NULL &&
+	    (bi->flags & INTEGRITY_FLAG_READ))
+		return 1;
+
+	if (rw == WRITE && bi->generate_fn != NULL &&
+	    (bi->flags & INTEGRITY_FLAG_WRITE))
+		return 1;
+
+	return 0;
+}
+
 /**
  * bio_integrity_enabled - Check whether integrity can be passed
  * @bio:	bio to check
@@ -313,6 +332,14 @@
 	}
 }
 
+static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
+{
+	if (bi)
+		return bi->tuple_size;
+
+	return 0;
+}
+
 /**
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:	bio to prepare
diff --git a/fs/bio.c b/fs/bio.c
index 3cba7ae..77a55bc 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -30,7 +30,7 @@
 
 static struct kmem_cache *bio_slab __read_mostly;
 
-mempool_t *bio_split_pool __read_mostly;
+static mempool_t *bio_split_pool __read_mostly;
 
 /*
  * if you change this list, also change bvec_alloc or things will
@@ -60,25 +60,46 @@
 	struct bio_vec *bvl;
 
 	/*
-	 * see comment near bvec_array define!
+	 * If 'bs' is given, lookup the pool and do the mempool alloc.
+	 * If not, this is a bio_kmalloc() allocation and just do a
+	 * kzalloc() for the exact number of vecs right away.
 	 */
-	switch (nr) {
-		case   1        : *idx = 0; break;
-		case   2 ...   4: *idx = 1; break;
-		case   5 ...  16: *idx = 2; break;
-		case  17 ...  64: *idx = 3; break;
-		case  65 ... 128: *idx = 4; break;
-		case 129 ... BIO_MAX_PAGES: *idx = 5; break;
+	if (bs) {
+		/*
+		 * see comment near bvec_array define!
+		 */
+		switch (nr) {
+		case 1:
+			*idx = 0;
+			break;
+		case 2 ... 4:
+			*idx = 1;
+			break;
+		case 5 ... 16:
+			*idx = 2;
+			break;
+		case 17 ... 64:
+			*idx = 3;
+			break;
+		case 65 ... 128:
+			*idx = 4;
+			break;
+		case 129 ... BIO_MAX_PAGES:
+			*idx = 5;
+			break;
 		default:
 			return NULL;
-	}
-	/*
-	 * idx now points to the pool we want to allocate from
-	 */
+		}
 
-	bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
-	if (bvl)
-		memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
+		/*
+		 * idx now points to the pool we want to allocate from
+		 */
+		bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
+		if (bvl)
+			memset(bvl, 0,
+				bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
+	} else
+		bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask);
 
 	return bvl;
 }
@@ -107,10 +128,17 @@
 	bio_free(bio, fs_bio_set);
 }
 
+static void bio_kmalloc_destructor(struct bio *bio)
+{
+	kfree(bio->bi_io_vec);
+	kfree(bio);
+}
+
 void bio_init(struct bio *bio)
 {
 	memset(bio, 0, sizeof(*bio));
 	bio->bi_flags = 1 << BIO_UPTODATE;
+	bio->bi_comp_cpu = -1;
 	atomic_set(&bio->bi_cnt, 1);
 }
 
@@ -118,19 +146,25 @@
  * bio_alloc_bioset - allocate a bio for I/O
  * @gfp_mask:   the GFP_ mask given to the slab allocator
  * @nr_iovecs:	number of iovecs to pre-allocate
- * @bs:		the bio_set to allocate from
+ * @bs:		the bio_set to allocate from. If %NULL, just use kmalloc
  *
  * Description:
- *   bio_alloc_bioset will first try it's on mempool to satisfy the allocation.
+ *   bio_alloc_bioset will first try its own mempool to satisfy the allocation.
  *   If %__GFP_WAIT is set then we will block on the internal pool waiting
- *   for a &struct bio to become free.
+ *   for a &struct bio to become free. If a %NULL @bs is passed in, we will
+ *   fall back to just using @kmalloc to allocate the required memory.
  *
  *   allocate bio and iovecs from the memory pools specified by the
- *   bio_set structure.
+ *   bio_set structure, or @kmalloc if none given.
  **/
 struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 {
-	struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask);
+	struct bio *bio;
+
+	if (bs)
+		bio = mempool_alloc(bs->bio_pool, gfp_mask);
+	else
+		bio = kmalloc(sizeof(*bio), gfp_mask);
 
 	if (likely(bio)) {
 		struct bio_vec *bvl = NULL;
@@ -141,7 +175,10 @@
 
 			bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
 			if (unlikely(!bvl)) {
-				mempool_free(bio, bs->bio_pool);
+				if (bs)
+					mempool_free(bio, bs->bio_pool);
+				else
+					kfree(bio);
 				bio = NULL;
 				goto out;
 			}
@@ -164,6 +201,23 @@
 	return bio;
 }
 
+/*
+ * Like bio_alloc(), but doesn't use a mempool backing. This means that
+ * it CAN fail, but while bio_alloc() can only be used for allocations
+ * that have a short (finite) life span, bio_kmalloc() should be used
+ * for more permanent bio allocations (like allocating some bio's for
+ * initalization or setup purposes).
+ */
+struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
+{
+	struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
+
+	if (bio)
+		bio->bi_destructor = bio_kmalloc_destructor;
+
+	return bio;
+}
+
 void zero_fill_bio(struct bio *bio)
 {
 	unsigned long flags;
@@ -208,14 +262,6 @@
 	return bio->bi_phys_segments;
 }
 
-inline int bio_hw_segments(struct request_queue *q, struct bio *bio)
-{
-	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
-		blk_recount_segments(q, bio);
-
-	return bio->bi_hw_segments;
-}
-
 /**
  * 	__bio_clone	-	clone a bio
  * 	@bio: destination bio
@@ -350,8 +396,7 @@
 	 */
 
 	while (bio->bi_phys_segments >= q->max_phys_segments
-	       || bio->bi_hw_segments >= q->max_hw_segments
-	       || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) {
+	       || bio->bi_phys_segments >= q->max_hw_segments) {
 
 		if (retried_segments)
 			return 0;
@@ -395,13 +440,11 @@
 	}
 
 	/* If we may be able to merge these biovecs, force a recount */
-	if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) ||
-	    BIOVEC_VIRT_MERGEABLE(bvec-1, bvec)))
+	if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
 		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
 
 	bio->bi_vcnt++;
 	bio->bi_phys_segments++;
-	bio->bi_hw_segments++;
  done:
 	bio->bi_size += len;
 	return len;
@@ -449,16 +492,19 @@
 
 struct bio_map_data {
 	struct bio_vec *iovecs;
-	int nr_sgvecs;
 	struct sg_iovec *sgvecs;
+	int nr_sgvecs;
+	int is_our_pages;
 };
 
 static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
-			     struct sg_iovec *iov, int iov_count)
+			     struct sg_iovec *iov, int iov_count,
+			     int is_our_pages)
 {
 	memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
 	memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
 	bmd->nr_sgvecs = iov_count;
+	bmd->is_our_pages = is_our_pages;
 	bio->bi_private = bmd;
 }
 
@@ -493,7 +539,8 @@
 }
 
 static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
-			  struct sg_iovec *iov, int iov_count, int uncopy)
+			  struct sg_iovec *iov, int iov_count, int uncopy,
+			  int do_free_page)
 {
 	int ret = 0, i;
 	struct bio_vec *bvec;
@@ -536,7 +583,7 @@
 			}
 		}
 
-		if (uncopy)
+		if (do_free_page)
 			__free_page(bvec->bv_page);
 	}
 
@@ -553,10 +600,11 @@
 int bio_uncopy_user(struct bio *bio)
 {
 	struct bio_map_data *bmd = bio->bi_private;
-	int ret;
+	int ret = 0;
 
-	ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1);
-
+	if (!bio_flagged(bio, BIO_NULL_MAPPED))
+		ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
+				     bmd->nr_sgvecs, 1, bmd->is_our_pages);
 	bio_free_map_data(bmd);
 	bio_put(bio);
 	return ret;
@@ -565,16 +613,20 @@
 /**
  *	bio_copy_user_iov	-	copy user data to bio
  *	@q: destination block queue
+ *	@map_data: pointer to the rq_map_data holding pages (if necessary)
  *	@iov:	the iovec.
  *	@iov_count: number of elements in the iovec
  *	@write_to_vm: bool indicating writing to pages or not
+ *	@gfp_mask: memory allocation flags
  *
  *	Prepares and returns a bio for indirect user io, bouncing data
  *	to/from kernel pages as necessary. Must be paired with
  *	call bio_uncopy_user() on io completion.
  */
-struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
-			      int iov_count, int write_to_vm)
+struct bio *bio_copy_user_iov(struct request_queue *q,
+			      struct rq_map_data *map_data,
+			      struct sg_iovec *iov, int iov_count,
+			      int write_to_vm, gfp_t gfp_mask)
 {
 	struct bio_map_data *bmd;
 	struct bio_vec *bvec;
@@ -597,25 +649,38 @@
 		len += iov[i].iov_len;
 	}
 
-	bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL);
+	bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
 	if (!bmd)
 		return ERR_PTR(-ENOMEM);
 
 	ret = -ENOMEM;
-	bio = bio_alloc(GFP_KERNEL, nr_pages);
+	bio = bio_alloc(gfp_mask, nr_pages);
 	if (!bio)
 		goto out_bmd;
 
 	bio->bi_rw |= (!write_to_vm << BIO_RW);
 
 	ret = 0;
+	i = 0;
 	while (len) {
-		unsigned int bytes = PAGE_SIZE;
+		unsigned int bytes;
+
+		if (map_data)
+			bytes = 1U << (PAGE_SHIFT + map_data->page_order);
+		else
+			bytes = PAGE_SIZE;
 
 		if (bytes > len)
 			bytes = len;
 
-		page = alloc_page(q->bounce_gfp | GFP_KERNEL);
+		if (map_data) {
+			if (i == map_data->nr_entries) {
+				ret = -ENOMEM;
+				break;
+			}
+			page = map_data->pages[i++];
+		} else
+			page = alloc_page(q->bounce_gfp | gfp_mask);
 		if (!page) {
 			ret = -ENOMEM;
 			break;
@@ -634,16 +699,17 @@
 	 * success
 	 */
 	if (!write_to_vm) {
-		ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0);
+		ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0);
 		if (ret)
 			goto cleanup;
 	}
 
-	bio_set_map_data(bmd, bio, iov, iov_count);
+	bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
 	return bio;
 cleanup:
-	bio_for_each_segment(bvec, bio, i)
-		__free_page(bvec->bv_page);
+	if (!map_data)
+		bio_for_each_segment(bvec, bio, i)
+			__free_page(bvec->bv_page);
 
 	bio_put(bio);
 out_bmd:
@@ -654,29 +720,32 @@
 /**
  *	bio_copy_user	-	copy user data to bio
  *	@q: destination block queue
+ *	@map_data: pointer to the rq_map_data holding pages (if necessary)
  *	@uaddr: start of user address
  *	@len: length in bytes
  *	@write_to_vm: bool indicating writing to pages or not
+ *	@gfp_mask: memory allocation flags
  *
  *	Prepares and returns a bio for indirect user io, bouncing data
  *	to/from kernel pages as necessary. Must be paired with
  *	call bio_uncopy_user() on io completion.
  */
-struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr,
-			  unsigned int len, int write_to_vm)
+struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
+			  unsigned long uaddr, unsigned int len,
+			  int write_to_vm, gfp_t gfp_mask)
 {
 	struct sg_iovec iov;
 
 	iov.iov_base = (void __user *)uaddr;
 	iov.iov_len = len;
 
-	return bio_copy_user_iov(q, &iov, 1, write_to_vm);
+	return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
 }
 
 static struct bio *__bio_map_user_iov(struct request_queue *q,
 				      struct block_device *bdev,
 				      struct sg_iovec *iov, int iov_count,
-				      int write_to_vm)
+				      int write_to_vm, gfp_t gfp_mask)
 {
 	int i, j;
 	int nr_pages = 0;
@@ -702,12 +771,12 @@
 	if (!nr_pages)
 		return ERR_PTR(-EINVAL);
 
-	bio = bio_alloc(GFP_KERNEL, nr_pages);
+	bio = bio_alloc(gfp_mask, nr_pages);
 	if (!bio)
 		return ERR_PTR(-ENOMEM);
 
 	ret = -ENOMEM;
-	pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
+	pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
 	if (!pages)
 		goto out;
 
@@ -786,19 +855,21 @@
  *	@uaddr: start of user address
  *	@len: length in bytes
  *	@write_to_vm: bool indicating writing to pages or not
+ *	@gfp_mask: memory allocation flags
  *
  *	Map the user space address into a bio suitable for io to a block
  *	device. Returns an error pointer in case of error.
  */
 struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
-			 unsigned long uaddr, unsigned int len, int write_to_vm)
+			 unsigned long uaddr, unsigned int len, int write_to_vm,
+			 gfp_t gfp_mask)
 {
 	struct sg_iovec iov;
 
 	iov.iov_base = (void __user *)uaddr;
 	iov.iov_len = len;
 
-	return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm);
+	return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
 }
 
 /**
@@ -808,18 +879,19 @@
  *	@iov:	the iovec.
  *	@iov_count: number of elements in the iovec
  *	@write_to_vm: bool indicating writing to pages or not
+ *	@gfp_mask: memory allocation flags
  *
  *	Map the user space address into a bio suitable for io to a block
  *	device. Returns an error pointer in case of error.
  */
 struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
 			     struct sg_iovec *iov, int iov_count,
-			     int write_to_vm)
+			     int write_to_vm, gfp_t gfp_mask)
 {
 	struct bio *bio;
 
-	bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm);
-
+	bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
+				 gfp_mask);
 	if (IS_ERR(bio))
 		return bio;
 
@@ -976,48 +1048,13 @@
 struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
 			  gfp_t gfp_mask, int reading)
 {
-	unsigned long kaddr = (unsigned long)data;
-	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	unsigned long start = kaddr >> PAGE_SHIFT;
-	const int nr_pages = end - start;
 	struct bio *bio;
 	struct bio_vec *bvec;
-	struct bio_map_data *bmd;
-	int i, ret;
-	struct sg_iovec iov;
+	int i;
 
-	iov.iov_base = data;
-	iov.iov_len = len;
-
-	bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask);
-	if (!bmd)
-		return ERR_PTR(-ENOMEM);
-
-	ret = -ENOMEM;
-	bio = bio_alloc(gfp_mask, nr_pages);
-	if (!bio)
-		goto out_bmd;
-
-	while (len) {
-		struct page *page;
-		unsigned int bytes = PAGE_SIZE;
-
-		if (bytes > len)
-			bytes = len;
-
-		page = alloc_page(q->bounce_gfp | gfp_mask);
-		if (!page) {
-			ret = -ENOMEM;
-			goto cleanup;
-		}
-
-		if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
-			ret = -EINVAL;
-			goto cleanup;
-		}
-
-		len -= bytes;
-	}
+	bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
+	if (IS_ERR(bio))
+		return bio;
 
 	if (!reading) {
 		void *p = data;
@@ -1030,20 +1067,9 @@
 		}
 	}
 
-	bio->bi_private = bmd;
 	bio->bi_end_io = bio_copy_kern_endio;
 
-	bio_set_map_data(bmd, bio, &iov, 1);
 	return bio;
-cleanup:
-	bio_for_each_segment(bvec, bio, i)
-		__free_page(bvec->bv_page);
-
-	bio_put(bio);
-out_bmd:
-	bio_free_map_data(bmd);
-
-	return ERR_PTR(ret);
 }
 
 /*
@@ -1230,9 +1256,9 @@
  * split a bio - only worry about a bio with a single page
  * in it's iovec
  */
-struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
+struct bio_pair *bio_split(struct bio *bi, int first_sectors)
 {
-	struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO);
+	struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
 
 	if (!bp)
 		return bp;
@@ -1266,7 +1292,7 @@
 	bp->bio2.bi_end_io = bio_pair_end_2;
 
 	bp->bio1.bi_private = bi;
-	bp->bio2.bi_private = pool;
+	bp->bio2.bi_private = bio_split_pool;
 
 	if (bio_integrity(bi))
 		bio_integrity_split(bi, bp, first_sectors);
@@ -1274,6 +1300,42 @@
 	return bp;
 }
 
+/**
+ *      bio_sector_offset - Find hardware sector offset in bio
+ *      @bio:           bio to inspect
+ *      @index:         bio_vec index
+ *      @offset:        offset in bv_page
+ *
+ *      Return the number of hardware sectors between beginning of bio
+ *      and an end point indicated by a bio_vec index and an offset
+ *      within that vector's page.
+ */
+sector_t bio_sector_offset(struct bio *bio, unsigned short index,
+			   unsigned int offset)
+{
+	unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue);
+	struct bio_vec *bv;
+	sector_t sectors;
+	int i;
+
+	sectors = 0;
+
+	if (index >= bio->bi_idx)
+		index = bio->bi_vcnt - 1;
+
+	__bio_for_each_segment(bv, bio, i, 0) {
+		if (i == index) {
+			if (offset > bv->bv_offset)
+				sectors += (offset - bv->bv_offset) / sector_sz;
+			break;
+		}
+
+		sectors += bv->bv_len / sector_sz;
+	}
+
+	return sectors;
+}
+EXPORT_SYMBOL(bio_sector_offset);
 
 /*
  * create memory pools for biovec's in a bio_set.
@@ -1376,6 +1438,7 @@
 subsys_initcall(init_bio);
 
 EXPORT_SYMBOL(bio_alloc);
+EXPORT_SYMBOL(bio_kmalloc);
 EXPORT_SYMBOL(bio_put);
 EXPORT_SYMBOL(bio_free);
 EXPORT_SYMBOL(bio_endio);
@@ -1383,7 +1446,6 @@
 EXPORT_SYMBOL(__bio_clone);
 EXPORT_SYMBOL(bio_clone);
 EXPORT_SYMBOL(bio_phys_segments);
-EXPORT_SYMBOL(bio_hw_segments);
 EXPORT_SYMBOL(bio_add_page);
 EXPORT_SYMBOL(bio_add_pc_page);
 EXPORT_SYMBOL(bio_get_nr_vecs);
@@ -1393,7 +1455,6 @@
 EXPORT_SYMBOL(bio_copy_kern);
 EXPORT_SYMBOL(bio_pair_release);
 EXPORT_SYMBOL(bio_split);
-EXPORT_SYMBOL(bio_split_pool);
 EXPORT_SYMBOL(bio_copy_user);
 EXPORT_SYMBOL(bio_uncopy_user);
 EXPORT_SYMBOL(bioset_create);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index aff5421..d84f0469 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -540,22 +540,6 @@
  *           /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
  */
 
-static struct kobject *bdev_get_kobj(struct block_device *bdev)
-{
-	if (bdev->bd_contains != bdev)
-		return kobject_get(&bdev->bd_part->dev.kobj);
-	else
-		return kobject_get(&bdev->bd_disk->dev.kobj);
-}
-
-static struct kobject *bdev_get_holder(struct block_device *bdev)
-{
-	if (bdev->bd_contains != bdev)
-		return kobject_get(bdev->bd_part->holder_dir);
-	else
-		return kobject_get(bdev->bd_disk->holder_dir);
-}
-
 static int add_symlink(struct kobject *from, struct kobject *to)
 {
 	if (!from || !to)
@@ -604,11 +588,11 @@
 	if (!bo->hdev)
 		goto fail_put_sdir;
 
-	bo->sdev = bdev_get_kobj(bdev);
+	bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
 	if (!bo->sdev)
 		goto fail_put_hdev;
 
-	bo->hdir = bdev_get_holder(bdev);
+	bo->hdir = kobject_get(bdev->bd_part->holder_dir);
 	if (!bo->hdir)
 		goto fail_put_sdev;
 
@@ -868,6 +852,87 @@
 
 EXPORT_SYMBOL(open_by_devnum);
 
+/**
+ * flush_disk - invalidates all buffer-cache entries on a disk
+ *
+ * @bdev:      struct block device to be flushed
+ *
+ * Invalidates all buffer-cache entries on a disk. It should be called
+ * when a disk has been changed -- either by a media change or online
+ * resize.
+ */
+static void flush_disk(struct block_device *bdev)
+{
+	if (__invalidate_device(bdev)) {
+		char name[BDEVNAME_SIZE] = "";
+
+		if (bdev->bd_disk)
+			disk_name(bdev->bd_disk, 0, name);
+		printk(KERN_WARNING "VFS: busy inodes on changed media or "
+		       "resized disk %s\n", name);
+	}
+
+	if (!bdev->bd_disk)
+		return;
+	if (disk_partitionable(bdev->bd_disk))
+		bdev->bd_invalidated = 1;
+}
+
+/**
+ * check_disk_size_change - checks for disk size change and adjusts bdev size.
+ * @disk: struct gendisk to check
+ * @bdev: struct bdev to adjust.
+ *
+ * This routine checks to see if the bdev size does not match the disk size
+ * and adjusts it if it differs.
+ */
+void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
+{
+	loff_t disk_size, bdev_size;
+
+	disk_size = (loff_t)get_capacity(disk) << 9;
+	bdev_size = i_size_read(bdev->bd_inode);
+	if (disk_size != bdev_size) {
+		char name[BDEVNAME_SIZE];
+
+		disk_name(disk, 0, name);
+		printk(KERN_INFO
+		       "%s: detected capacity change from %lld to %lld\n",
+		       name, bdev_size, disk_size);
+		i_size_write(bdev->bd_inode, disk_size);
+		flush_disk(bdev);
+	}
+}
+EXPORT_SYMBOL(check_disk_size_change);
+
+/**
+ * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
+ * @disk: struct gendisk to be revalidated
+ *
+ * This routine is a wrapper for lower-level driver's revalidate_disk
+ * call-backs.  It is used to do common pre and post operations needed
+ * for all revalidate_disk operations.
+ */
+int revalidate_disk(struct gendisk *disk)
+{
+	struct block_device *bdev;
+	int ret = 0;
+
+	if (disk->fops->revalidate_disk)
+		ret = disk->fops->revalidate_disk(disk);
+
+	bdev = bdget_disk(disk, 0);
+	if (!bdev)
+		return ret;
+
+	mutex_lock(&bdev->bd_mutex);
+	check_disk_size_change(disk, bdev);
+	mutex_unlock(&bdev->bd_mutex);
+	bdput(bdev);
+	return ret;
+}
+EXPORT_SYMBOL(revalidate_disk);
+
 /*
  * This routine checks whether a removable media has been changed,
  * and invalidates all buffer-cache-entries in that case. This
@@ -887,13 +952,9 @@
 	if (!bdops->media_changed(bdev->bd_disk))
 		return 0;
 
-	if (__invalidate_device(bdev))
-		printk("VFS: busy inodes on changed media.\n");
-
+	flush_disk(bdev);
 	if (bdops->revalidate_disk)
 		bdops->revalidate_disk(bdev->bd_disk);
-	if (bdev->bd_disk->minors > 1)
-		bdev->bd_invalidated = 1;
 	return 1;
 }
 
@@ -927,10 +988,10 @@
 
 static int do_open(struct block_device *bdev, struct file *file, int for_part)
 {
-	struct module *owner = NULL;
 	struct gendisk *disk;
+	struct hd_struct *part = NULL;
 	int ret;
-	int part;
+	int partno;
 	int perm = 0;
 
 	if (file->f_mode & FMODE_READ)
@@ -948,25 +1009,27 @@
 
 	ret = -ENXIO;
 	file->f_mapping = bdev->bd_inode->i_mapping;
+
 	lock_kernel();
-	disk = get_gendisk(bdev->bd_dev, &part);
-	if (!disk) {
-		unlock_kernel();
-		bdput(bdev);
-		return ret;
-	}
-	owner = disk->fops->owner;
+
+	disk = get_gendisk(bdev->bd_dev, &partno);
+	if (!disk)
+		goto out_unlock_kernel;
+	part = disk_get_part(disk, partno);
+	if (!part)
+		goto out_unlock_kernel;
 
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (!bdev->bd_openers) {
 		bdev->bd_disk = disk;
+		bdev->bd_part = part;
 		bdev->bd_contains = bdev;
-		if (!part) {
+		if (!partno) {
 			struct backing_dev_info *bdi;
 			if (disk->fops->open) {
 				ret = disk->fops->open(bdev->bd_inode, file);
 				if (ret)
-					goto out_first;
+					goto out_clear;
 			}
 			if (!bdev->bd_openers) {
 				bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
@@ -978,36 +1041,36 @@
 			if (bdev->bd_invalidated)
 				rescan_partitions(disk, bdev);
 		} else {
-			struct hd_struct *p;
 			struct block_device *whole;
 			whole = bdget_disk(disk, 0);
 			ret = -ENOMEM;
 			if (!whole)
-				goto out_first;
+				goto out_clear;
 			BUG_ON(for_part);
 			ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1);
 			if (ret)
-				goto out_first;
+				goto out_clear;
 			bdev->bd_contains = whole;
-			p = disk->part[part - 1];
 			bdev->bd_inode->i_data.backing_dev_info =
 			   whole->bd_inode->i_data.backing_dev_info;
-			if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
+			if (!(disk->flags & GENHD_FL_UP) ||
+			    !part || !part->nr_sects) {
 				ret = -ENXIO;
-				goto out_first;
+				goto out_clear;
 			}
-			kobject_get(&p->dev.kobj);
-			bdev->bd_part = p;
-			bd_set_size(bdev, (loff_t) p->nr_sects << 9);
+			bd_set_size(bdev, (loff_t)part->nr_sects << 9);
 		}
 	} else {
+		disk_put_part(part);
 		put_disk(disk);
-		module_put(owner);
+		module_put(disk->fops->owner);
+		part = NULL;
+		disk = NULL;
 		if (bdev->bd_contains == bdev) {
 			if (bdev->bd_disk->fops->open) {
 				ret = bdev->bd_disk->fops->open(bdev->bd_inode, file);
 				if (ret)
-					goto out;
+					goto out_unlock_bdev;
 			}
 			if (bdev->bd_invalidated)
 				rescan_partitions(bdev->bd_disk, bdev);
@@ -1020,19 +1083,24 @@
 	unlock_kernel();
 	return 0;
 
-out_first:
+ out_clear:
 	bdev->bd_disk = NULL;
+	bdev->bd_part = NULL;
 	bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 	if (bdev != bdev->bd_contains)
 		__blkdev_put(bdev->bd_contains, 1);
 	bdev->bd_contains = NULL;
-	put_disk(disk);
-	module_put(owner);
-out:
+ out_unlock_bdev:
 	mutex_unlock(&bdev->bd_mutex);
+ out_unlock_kernel:
 	unlock_kernel();
-	if (ret)
-		bdput(bdev);
+
+	disk_put_part(part);
+	if (disk)
+		module_put(disk->fops->owner);
+	put_disk(disk);
+	bdput(bdev);
+
 	return ret;
 }
 
@@ -1117,11 +1185,8 @@
 
 		put_disk(disk);
 		module_put(owner);
-
-		if (bdev->bd_contains != bdev) {
-			kobject_put(&bdev->bd_part->dev.kobj);
-			bdev->bd_part = NULL;
-		}
+		disk_put_part(bdev->bd_part);
+		bdev->bd_part = NULL;
 		bdev->bd_disk = NULL;
 		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 		if (bdev != bdev->bd_contains)
@@ -1197,10 +1262,9 @@
 
 /**
  * lookup_bdev  - lookup a struct block_device by name
+ * @pathname:	special file representing the block device
  *
- * @path:	special file representing the block device
- *
- * Get a reference to the blockdevice at @path in the current
+ * Get a reference to the blockdevice at @pathname in the current
  * namespace if possible and return it.  Return ERR_PTR(error)
  * otherwise.
  */
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 302e95c..fb98b3d 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -6,6 +6,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/msdos_fs.h>
+#include <linux/blkdev.h>
 
 struct fatent_operations {
 	void (*ent_blocknr)(struct super_block *, int, int *, sector_t *);
@@ -535,6 +536,7 @@
 	struct fat_entry fatent;
 	struct buffer_head *bhs[MAX_BUF_PER_PAGE];
 	int i, err, nr_bhs;
+	int first_cl = cluster;
 
 	nr_bhs = 0;
 	fatent_init(&fatent);
@@ -551,6 +553,18 @@
 			goto error;
 		}
 
+		/* 
+		 * Issue discard for the sectors we no longer care about,
+		 * batching contiguous clusters into one request
+		 */
+		if (cluster != fatent.entry + 1) {
+			int nr_clus = fatent.entry - first_cl + 1;
+
+			sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl),
+					 nr_clus * sbi->sec_per_clus);
+			first_cl = cluster;
+		}
+
 		ops->ent_put(&fatent, FAT_ENT_FREE);
 		if (sbi->free_clusters != -1) {
 			sbi->free_clusters++;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 13391e5..c962283 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1265,6 +1265,8 @@
 	holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
 	if (time_before(now, holdtime))
 		delay = holdtime - now;
+	if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
+		delay = gl->gl_ops->go_min_hold_time;
 
 	spin_lock(&gl->gl_spin);
 	handle_callback(gl, state, 1, delay);
@@ -1578,8 +1580,6 @@
 		*p++ = 'a';
 	if (flags & GL_EXACT)
 		*p++ = 'E';
-	if (flags & GL_ATIME)
-		*p++ = 'a';
 	if (flags & GL_NOCACHE)
 		*p++ = 'c';
 	if (test_bit(HIF_HOLDER, &iflags))
@@ -1816,15 +1816,17 @@
 	if (gl) {
 		gi->gl = hlist_entry(gl->gl_list.next,
 				     struct gfs2_glock, gl_list);
-		if (gi->gl)
-			gfs2_glock_hold(gi->gl);
+	} else {
+		gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
+				     struct gfs2_glock, gl_list);
 	}
+	if (gi->gl)
+		gfs2_glock_hold(gi->gl);
 	read_unlock(gl_lock_addr(gi->hash));
 	if (gl)
 		gfs2_glock_put(gl);
-	if (gl && gi->gl == NULL)
-		gi->hash++;
 	while (gi->gl == NULL) {
+		gi->hash++;
 		if (gi->hash >= GFS2_GL_HASH_SIZE)
 			return 1;
 		read_lock(gl_lock_addr(gi->hash));
@@ -1833,7 +1835,6 @@
 		if (gi->gl)
 			gfs2_glock_hold(gi->gl);
 		read_unlock(gl_lock_addr(gi->hash));
-		gi->hash++;
 	}
 
 	if (gi->sdp != gi->gl->gl_sbd)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 971d92a..695c6b1 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -24,7 +24,6 @@
 #define GL_ASYNC		0x00000040
 #define GL_EXACT		0x00000080
 #define GL_SKIP			0x00000100
-#define GL_ATIME		0x00000200
 #define GL_NOCACHE		0x00000400
 
 #define GLR_TRYFAILED		13
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 448697a..f566ec1 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -386,20 +386,21 @@
 #define GFS2_DATA_ORDERED	2
 
 struct gfs2_args {
-	char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
-	char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
-	char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
-	int ar_spectator; /* Don't get a journal because we're always RO */
-	int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */
-	int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */
-	int ar_localcaching; /* Local-style caching (dangerous on multihost) */
-	int ar_debug; /* Oops on errors instead of trying to be graceful */
-	int ar_upgrade; /* Upgrade ondisk/multihost format */
-	unsigned int ar_num_glockd; /* Number of glockd threads */
-	int ar_posix_acl; /* Enable posix acls */
-	int ar_quota; /* off/account/on */
-	int ar_suiddir; /* suiddir support */
-	int ar_data; /* ordered/writeback */
+	char ar_lockproto[GFS2_LOCKNAME_LEN];	/* Name of the Lock Protocol */
+	char ar_locktable[GFS2_LOCKNAME_LEN];	/* Name of the Lock Table */
+	char ar_hostdata[GFS2_LOCKNAME_LEN];	/* Host specific data */
+	unsigned int ar_spectator:1;		/* Don't get a journal */
+	unsigned int ar_ignore_local_fs:1;	/* Ignore optimisations */
+	unsigned int ar_localflocks:1;		/* Let the VFS do flock|fcntl */
+	unsigned int ar_localcaching:1;		/* Local caching */
+	unsigned int ar_debug:1;		/* Oops on errors */
+	unsigned int ar_upgrade:1;		/* Upgrade ondisk format */
+	unsigned int ar_posix_acl:1;		/* Enable posix acls */
+	unsigned int ar_quota:2;		/* off/account/on */
+	unsigned int ar_suiddir:1;		/* suiddir support */
+	unsigned int ar_data:2;			/* ordered/writeback */
+	unsigned int ar_meta:1;			/* mount metafs */
+	unsigned int ar_num_glockd;		/* Number of glockd threads */
 };
 
 struct gfs2_tune {
@@ -419,7 +420,6 @@
 	unsigned int gt_quota_scale_den; /* Denominator */
 	unsigned int gt_quota_cache_secs;
 	unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
-	unsigned int gt_atime_quantum; /* Min secs between atime updates */
 	unsigned int gt_new_files_jdata;
 	unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
 	unsigned int gt_stall_secs; /* Detects trouble! */
@@ -432,7 +432,7 @@
 	SDF_JOURNAL_CHECKED	= 0,
 	SDF_JOURNAL_LIVE	= 1,
 	SDF_SHUTDOWN		= 2,
-	SDF_NOATIME		= 3,
+	SDF_NOBARRIERS		= 3,
 };
 
 #define GFS2_FSNAME_LEN		256
@@ -461,7 +461,6 @@
 
 struct gfs2_sbd {
 	struct super_block *sd_vfs;
-	struct super_block *sd_vfs_meta;
 	struct kobject sd_kobj;
 	unsigned long sd_flags;	/* SDF_... */
 	struct gfs2_sb_host sd_sb;
@@ -499,7 +498,9 @@
 
 	/* Inode Stuff */
 
-	struct inode *sd_master_dir;
+	struct dentry *sd_master_dir;
+	struct dentry *sd_root_dir;
+
 	struct inode *sd_jindex;
 	struct inode *sd_inum_inode;
 	struct inode *sd_statfs_inode;
@@ -634,7 +635,6 @@
 	/* Debugging crud */
 
 	unsigned long sd_last_warning;
-	struct vfsmount *sd_gfs2mnt;
 	struct dentry *debugfs_dir;    /* debugfs directory */
 	struct dentry *debugfs_dentry_glocks; /* for debugfs */
 };
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8b0806a..7cee695 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -18,6 +18,7 @@
 #include <linux/crc32.h>
 #include <linux/lm_interface.h>
 #include <linux/security.h>
+#include <linux/time.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -249,6 +250,7 @@
 {
 	struct gfs2_dinode_host *di = &ip->i_di;
 	const struct gfs2_dinode *str = buf;
+	struct timespec atime;
 	u16 height, depth;
 
 	if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
@@ -275,8 +277,10 @@
 	di->di_size = be64_to_cpu(str->di_size);
 	i_size_write(&ip->i_inode, di->di_size);
 	gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
-	ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
-	ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
+	atime.tv_sec = be64_to_cpu(str->di_atime);
+	atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
+	if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
+		ip->i_inode.i_atime = atime;
 	ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
 	ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
 	ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
@@ -1033,13 +1037,11 @@
 
 	if (bh)
 		brelse(bh);
-	if (!inode)
-		return ERR_PTR(-ENOMEM);
 	return inode;
 
 fail_gunlock2:
 	gfs2_glock_dq_uninit(ghs + 1);
-	if (inode)
+	if (inode && !IS_ERR(inode))
 		iput(inode);
 fail_gunlock:
 	gfs2_glock_dq(ghs);
@@ -1140,54 +1142,6 @@
 	return 0;
 }
 
-/*
- * gfs2_ok_to_move - check if it's ok to move a directory to another directory
- * @this: move this
- * @to: to here
- *
- * Follow @to back to the root and make sure we don't encounter @this
- * Assumes we already hold the rename lock.
- *
- * Returns: errno
- */
-
-int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
-{
-	struct inode *dir = &to->i_inode;
-	struct super_block *sb = dir->i_sb;
-	struct inode *tmp;
-	struct qstr dotdot;
-	int error = 0;
-
-	gfs2_str2qstr(&dotdot, "..");
-
-	igrab(dir);
-
-	for (;;) {
-		if (dir == &this->i_inode) {
-			error = -EINVAL;
-			break;
-		}
-		if (dir == sb->s_root->d_inode) {
-			error = 0;
-			break;
-		}
-
-		tmp = gfs2_lookupi(dir, &dotdot, 1);
-		if (IS_ERR(tmp)) {
-			error = PTR_ERR(tmp);
-			break;
-		}
-
-		iput(dir);
-		dir = tmp;
-	}
-
-	iput(dir);
-
-	return error;
-}
-
 /**
  * gfs2_readlinki - return the contents of a symlink
  * @ip: the symlink's inode
@@ -1207,8 +1161,8 @@
 	unsigned int x;
 	int error;
 
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
-	error = gfs2_glock_nq_atime(&i_gh);
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+	error = gfs2_glock_nq(&i_gh);
 	if (error) {
 		gfs2_holder_uninit(&i_gh);
 		return error;
@@ -1243,101 +1197,6 @@
 	return error;
 }
 
-/**
- * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and
- *       conditionally update the inode's atime
- * @gh: the holder to acquire
- *
- * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap
- * Update if the difference between the current time and the inode's current
- * atime is greater than an interval specified at mount.
- *
- * Returns: errno
- */
-
-int gfs2_glock_nq_atime(struct gfs2_holder *gh)
-{
-	struct gfs2_glock *gl = gh->gh_gl;
-	struct gfs2_sbd *sdp = gl->gl_sbd;
-	struct gfs2_inode *ip = gl->gl_object;
-	s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum);
-	unsigned int state;
-	int flags;
-	int error;
-	struct timespec tv = CURRENT_TIME;
-
-	if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
-	    gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
-	    gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops))
-		return -EINVAL;
-
-	state = gh->gh_state;
-	flags = gh->gh_flags;
-
-	error = gfs2_glock_nq(gh);
-	if (error)
-		return error;
-
-	if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
-	    (sdp->sd_vfs->s_flags & MS_RDONLY))
-		return 0;
-
-	if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
-		gfs2_glock_dq(gh);
-		gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
-				   gh);
-		error = gfs2_glock_nq(gh);
-		if (error)
-			return error;
-
-		/* Verify that atime hasn't been updated while we were
-		   trying to get exclusive lock. */
-
-		tv = CURRENT_TIME;
-		if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
-			struct buffer_head *dibh;
-			struct gfs2_dinode *di;
-
-			error = gfs2_trans_begin(sdp, RES_DINODE, 0);
-			if (error == -EROFS)
-				return 0;
-			if (error)
-				goto fail;
-
-			error = gfs2_meta_inode_buffer(ip, &dibh);
-			if (error)
-				goto fail_end_trans;
-
-			ip->i_inode.i_atime = tv;
-
-			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-			di = (struct gfs2_dinode *)dibh->b_data;
-			di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
-			di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
-			brelse(dibh);
-
-			gfs2_trans_end(sdp);
-		}
-
-		/* If someone else has asked for the glock,
-		   unlock and let them have it. Then reacquire
-		   in the original state. */
-		if (gfs2_glock_is_blocking(gl)) {
-			gfs2_glock_dq(gh);
-			gfs2_holder_reinit(state, flags, gh);
-			return gfs2_glock_nq(gh);
-		}
-	}
-
-	return 0;
-
-fail_end_trans:
-	gfs2_trans_end(sdp);
-fail:
-	gfs2_glock_dq(gh);
-	return error;
-}
-
 static int
 __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 {
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 58f9607..2d43f69 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -91,9 +91,7 @@
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 		   const struct gfs2_inode *ip);
 int gfs2_permission(struct inode *inode, int mask);
-int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
-int gfs2_glock_nq_atime(struct gfs2_holder *gh);
 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6c6af9f..ad30585 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/bio.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -584,7 +585,6 @@
 	memset(bh->b_data, 0, bh->b_size);
 	set_buffer_uptodate(bh);
 	clear_buffer_dirty(bh);
-	unlock_buffer(bh);
 
 	gfs2_ail1_empty(sdp, 0);
 	tail = current_tail(sdp);
@@ -601,8 +601,23 @@
 	hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
 	lh->lh_hash = cpu_to_be32(hash);
 
-	set_buffer_dirty(bh);
-	if (sync_dirty_buffer(bh))
+	bh->b_end_io = end_buffer_write_sync;
+	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
+		goto skip_barrier;
+	get_bh(bh);
+	submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh);
+	wait_on_buffer(bh);
+	if (buffer_eopnotsupp(bh)) {
+		clear_buffer_eopnotsupp(bh);
+		set_buffer_uptodate(bh);
+		set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
+		lock_buffer(bh);
+skip_barrier:
+		get_bh(bh);
+		submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh);
+		wait_on_buffer(bh);
+	}
+	if (!buffer_uptodate(bh))
 		gfs2_io_error_bh(sdp, bh);
 	brelse(bh);
 
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index b941f9f..df48333 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -42,6 +42,7 @@
 	Opt_nosuiddir,
 	Opt_data_writeback,
 	Opt_data_ordered,
+	Opt_meta,
 	Opt_err,
 };
 
@@ -66,6 +67,7 @@
 	{Opt_nosuiddir, "nosuiddir"},
 	{Opt_data_writeback, "data=writeback"},
 	{Opt_data_ordered, "data=ordered"},
+	{Opt_meta, "meta"},
 	{Opt_err, NULL}
 };
 
@@ -239,6 +241,11 @@
 		case Opt_data_ordered:
 			args->ar_data = GFS2_DATA_ORDERED;
 			break;
+		case Opt_meta:
+			if (remount && args->ar_meta != 1)
+				goto cant_remount;
+			args->ar_meta = 1;
+			break;
 		case Opt_err:
 		default:
 			fs_info(sdp, "unknown option: %s\n", o);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index e64a1b0..2756381 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -512,8 +512,8 @@
 	int error;
 
 	unlock_page(page);
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
-	error = gfs2_glock_nq_atime(&gh);
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+	error = gfs2_glock_nq(&gh);
 	if (unlikely(error))
 		goto out;
 	error = AOP_TRUNCATED_PAGE;
@@ -594,8 +594,8 @@
 	struct gfs2_holder gh;
 	int ret;
 
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
-	ret = gfs2_glock_nq_atime(&gh);
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+	ret = gfs2_glock_nq(&gh);
 	if (unlikely(ret))
 		goto out_uninit;
 	if (!gfs2_is_stuffed(ip))
@@ -636,8 +636,8 @@
 	unsigned to = from + len;
 	struct page *page;
 
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh);
-	error = gfs2_glock_nq_atime(&ip->i_gh);
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
+	error = gfs2_glock_nq(&ip->i_gh);
 	if (unlikely(error))
 		goto out_uninit;
 
@@ -975,7 +975,7 @@
 	if (gfs2_is_stuffed(ip))
 		return 0;
 
-	if (offset > i_size_read(&ip->i_inode))
+	if (offset >= i_size_read(&ip->i_inode))
 		return 0;
 	return 1;
 }
@@ -1000,8 +1000,8 @@
 	 * unfortunately have the option of only flushing a range like
 	 * the VFS does.
 	 */
-	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
-	rv = gfs2_glock_nq_atime(&gh);
+	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
+	rv = gfs2_glock_nq(&gh);
 	if (rv)
 		return rv;
 	rv = gfs2_ok_for_dio(ip, rw, offset);
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index e9a366d..3a747f8 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -89,8 +89,8 @@
 	u64 offset = file->f_pos;
 	int error;
 
-	gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
-	error = gfs2_glock_nq_atime(&d_gh);
+	gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+	error = gfs2_glock_nq(&d_gh);
 	if (error) {
 		gfs2_holder_uninit(&d_gh);
 		return error;
@@ -153,8 +153,8 @@
 	int error;
 	u32 fsflags;
 
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
-	error = gfs2_glock_nq_atime(&gh);
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+	error = gfs2_glock_nq(&gh);
 	if (error)
 		return error;
 
@@ -351,8 +351,8 @@
 	struct gfs2_alloc *al;
 	int ret;
 
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh);
-	ret = gfs2_glock_nq_atime(&gh);
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	ret = gfs2_glock_nq(&gh);
 	if (ret)
 		goto out;
 
@@ -434,8 +434,8 @@
 	struct gfs2_holder i_gh;
 	int error;
 
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
-	error = gfs2_glock_nq_atime(&i_gh);
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+	error = gfs2_glock_nq(&i_gh);
 	if (error) {
 		gfs2_holder_uninit(&i_gh);
 		return error;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b4d1d64..b117fcf 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -40,6 +40,44 @@
 #define DO 0
 #define UNDO 1
 
+static const u32 gfs2_old_fs_formats[] = {
+        0
+};
+
+static const u32 gfs2_old_multihost_formats[] = {
+        0
+};
+
+/**
+ * gfs2_tune_init - Fill a gfs2_tune structure with default values
+ * @gt: tune
+ *
+ */
+
+static void gfs2_tune_init(struct gfs2_tune *gt)
+{
+	spin_lock_init(&gt->gt_spin);
+
+	gt->gt_demote_secs = 300;
+	gt->gt_incore_log_blocks = 1024;
+	gt->gt_log_flush_secs = 60;
+	gt->gt_recoverd_secs = 60;
+	gt->gt_logd_secs = 1;
+	gt->gt_quotad_secs = 5;
+	gt->gt_quota_simul_sync = 64;
+	gt->gt_quota_warn_period = 10;
+	gt->gt_quota_scale_num = 1;
+	gt->gt_quota_scale_den = 1;
+	gt->gt_quota_cache_secs = 300;
+	gt->gt_quota_quantum = 60;
+	gt->gt_new_files_jdata = 0;
+	gt->gt_max_readahead = 1 << 18;
+	gt->gt_stall_secs = 600;
+	gt->gt_complain_secs = 10;
+	gt->gt_statfs_quantum = 30;
+	gt->gt_statfs_slow = 0;
+}
+
 static struct gfs2_sbd *init_sbd(struct super_block *sb)
 {
 	struct gfs2_sbd *sdp;
@@ -96,21 +134,271 @@
 	return sdp;
 }
 
-static void init_vfs(struct super_block *sb, unsigned noatime)
+
+/**
+ * gfs2_check_sb - Check superblock
+ * @sdp: the filesystem
+ * @sb: The superblock
+ * @silent: Don't print a message if the check fails
+ *
+ * Checks the version code of the FS is one that we understand how to
+ * read and that the sizes of the various on-disk structures have not
+ * changed.
+ */
+
+static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
 {
-	struct gfs2_sbd *sdp = sb->s_fs_info;
+	unsigned int x;
 
-	sb->s_magic = GFS2_MAGIC;
-	sb->s_op = &gfs2_super_ops;
-	sb->s_export_op = &gfs2_export_ops;
-	sb->s_time_gran = 1;
-	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	if (sb->sb_magic != GFS2_MAGIC ||
+	    sb->sb_type != GFS2_METATYPE_SB) {
+		if (!silent)
+			printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
+		return -EINVAL;
+	}
 
-	if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
-		set_bit(noatime, &sdp->sd_flags);
+	/*  If format numbers match exactly, we're done.  */
 
-	/* Don't let the VFS update atimes.  GFS2 handles this itself. */
-	sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
+	if (sb->sb_fs_format == GFS2_FORMAT_FS &&
+	    sb->sb_multihost_format == GFS2_FORMAT_MULTI)
+		return 0;
+
+	if (sb->sb_fs_format != GFS2_FORMAT_FS) {
+		for (x = 0; gfs2_old_fs_formats[x]; x++)
+			if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
+				break;
+
+		if (!gfs2_old_fs_formats[x]) {
+			printk(KERN_WARNING
+			       "GFS2: code version (%u, %u) is incompatible "
+			       "with ondisk format (%u, %u)\n",
+			       GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+			       sb->sb_fs_format, sb->sb_multihost_format);
+			printk(KERN_WARNING
+			       "GFS2: I don't know how to upgrade this FS\n");
+			return -EINVAL;
+		}
+	}
+
+	if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
+		for (x = 0; gfs2_old_multihost_formats[x]; x++)
+			if (gfs2_old_multihost_formats[x] ==
+			    sb->sb_multihost_format)
+				break;
+
+		if (!gfs2_old_multihost_formats[x]) {
+			printk(KERN_WARNING
+			       "GFS2: code version (%u, %u) is incompatible "
+			       "with ondisk format (%u, %u)\n",
+			       GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+			       sb->sb_fs_format, sb->sb_multihost_format);
+			printk(KERN_WARNING
+			       "GFS2: I don't know how to upgrade this FS\n");
+			return -EINVAL;
+		}
+	}
+
+	if (!sdp->sd_args.ar_upgrade) {
+		printk(KERN_WARNING
+		       "GFS2: code version (%u, %u) is incompatible "
+		       "with ondisk format (%u, %u)\n",
+		       GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+		       sb->sb_fs_format, sb->sb_multihost_format);
+		printk(KERN_INFO
+		       "GFS2: Use the \"upgrade\" mount option to upgrade "
+		       "the FS\n");
+		printk(KERN_INFO "GFS2: See the manual for more details\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void end_bio_io_page(struct bio *bio, int error)
+{
+	struct page *page = bio->bi_private;
+
+	if (!error)
+		SetPageUptodate(page);
+	else
+		printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
+	unlock_page(page);
+}
+
+static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
+{
+	const struct gfs2_sb *str = buf;
+
+	sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
+	sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
+	sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
+	sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
+	sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
+	sb->sb_bsize = be32_to_cpu(str->sb_bsize);
+	sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
+	sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
+	sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
+	sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
+	sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
+
+	memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
+	memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
+}
+
+/**
+ * gfs2_read_super - Read the gfs2 super block from disk
+ * @sdp: The GFS2 super block
+ * @sector: The location of the super block
+ * @error: The error code to return
+ *
+ * This uses the bio functions to read the super block from disk
+ * because we want to be 100% sure that we never read cached data.
+ * A super block is read twice only during each GFS2 mount and is
+ * never written to by the filesystem. The first time its read no
+ * locks are held, and the only details which are looked at are those
+ * relating to the locking protocol. Once locking is up and working,
+ * the sb is read again under the lock to establish the location of
+ * the master directory (contains pointers to journals etc) and the
+ * root directory.
+ *
+ * Returns: 0 on success or error
+ */
+
+static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
+{
+	struct super_block *sb = sdp->sd_vfs;
+	struct gfs2_sb *p;
+	struct page *page;
+	struct bio *bio;
+
+	page = alloc_page(GFP_NOFS);
+	if (unlikely(!page))
+		return -ENOBUFS;
+
+	ClearPageUptodate(page);
+	ClearPageDirty(page);
+	lock_page(page);
+
+	bio = bio_alloc(GFP_NOFS, 1);
+	if (unlikely(!bio)) {
+		__free_page(page);
+		return -ENOBUFS;
+	}
+
+	bio->bi_sector = sector * (sb->s_blocksize >> 9);
+	bio->bi_bdev = sb->s_bdev;
+	bio_add_page(bio, page, PAGE_SIZE, 0);
+
+	bio->bi_end_io = end_bio_io_page;
+	bio->bi_private = page;
+	submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
+	wait_on_page_locked(page);
+	bio_put(bio);
+	if (!PageUptodate(page)) {
+		__free_page(page);
+		return -EIO;
+	}
+	p = kmap(page);
+	gfs2_sb_in(&sdp->sd_sb, p);
+	kunmap(page);
+	__free_page(page);
+	return 0;
+}
+/**
+ * gfs2_read_sb - Read super block
+ * @sdp: The GFS2 superblock
+ * @gl: the glock for the superblock (assumed to be held)
+ * @silent: Don't print message if mount fails
+ *
+ */
+
+static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
+{
+	u32 hash_blocks, ind_blocks, leaf_blocks;
+	u32 tmp_blocks;
+	unsigned int x;
+	int error;
+
+	error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+	if (error) {
+		if (!silent)
+			fs_err(sdp, "can't read superblock\n");
+		return error;
+	}
+
+	error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
+	if (error)
+		return error;
+
+	sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
+			       GFS2_BASIC_BLOCK_SHIFT;
+	sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
+	sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
+			  sizeof(struct gfs2_dinode)) / sizeof(u64);
+	sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
+			  sizeof(struct gfs2_meta_header)) / sizeof(u64);
+	sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
+	sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
+	sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
+	sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64);
+	sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
+				sizeof(struct gfs2_meta_header)) /
+			        sizeof(struct gfs2_quota_change);
+
+	/* Compute maximum reservation required to add a entry to a directory */
+
+	hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
+			     sdp->sd_jbsize);
+
+	ind_blocks = 0;
+	for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
+		tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
+		ind_blocks += tmp_blocks;
+	}
+
+	leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
+
+	sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
+
+	sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
+				sizeof(struct gfs2_dinode);
+	sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
+	for (x = 2;; x++) {
+		u64 space, d;
+		u32 m;
+
+		space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
+		d = space;
+		m = do_div(d, sdp->sd_inptrs);
+
+		if (d != sdp->sd_heightsize[x - 1] || m)
+			break;
+		sdp->sd_heightsize[x] = space;
+	}
+	sdp->sd_max_height = x;
+	sdp->sd_heightsize[x] = ~0;
+	gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
+
+	sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
+				 sizeof(struct gfs2_dinode);
+	sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
+	for (x = 2;; x++) {
+		u64 space, d;
+		u32 m;
+
+		space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
+		d = space;
+		m = do_div(d, sdp->sd_inptrs);
+
+		if (d != sdp->sd_jheightsize[x - 1] || m)
+			break;
+		sdp->sd_jheightsize[x] = space;
+	}
+	sdp->sd_max_jheight = x;
+	sdp->sd_jheightsize[x] = ~0;
+	gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
+
+	return 0;
 }
 
 static int init_names(struct gfs2_sbd *sdp, int silent)
@@ -224,51 +512,59 @@
 	return error;
 }
 
-static inline struct inode *gfs2_lookup_root(struct super_block *sb,
-					     u64 no_addr)
+static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
+			    u64 no_addr, const char *name)
 {
-	return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
+	struct gfs2_sbd *sdp = sb->s_fs_info;
+	struct dentry *dentry;
+	struct inode *inode;
+
+	inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
+	if (IS_ERR(inode)) {
+		fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
+		return PTR_ERR(inode);
+	}
+	dentry = d_alloc_root(inode);
+	if (!dentry) {
+		fs_err(sdp, "can't alloc %s dentry\n", name);
+		iput(inode);
+		return -ENOMEM;
+	}
+	dentry->d_op = &gfs2_dops;
+	*dptr = dentry;
+	return 0;
 }
 
-static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
+static int init_sb(struct gfs2_sbd *sdp, int silent)
 {
 	struct super_block *sb = sdp->sd_vfs;
 	struct gfs2_holder sb_gh;
 	u64 no_addr;
-	struct inode *inode;
-	int error = 0;
+	int ret;
 
-	if (undo) {
-		if (sb->s_root) {
-			dput(sb->s_root);
-			sb->s_root = NULL;
-		}
-		return 0;
+	ret = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops,
+				LM_ST_SHARED, 0, &sb_gh);
+	if (ret) {
+		fs_err(sdp, "can't acquire superblock glock: %d\n", ret);
+		return ret;
 	}
 
-	error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops,
-				 LM_ST_SHARED, 0, &sb_gh);
-	if (error) {
-		fs_err(sdp, "can't acquire superblock glock: %d\n", error);
-		return error;
-	}
-
-	error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
-	if (error) {
-		fs_err(sdp, "can't read superblock: %d\n", error);
+	ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
+	if (ret) {
+		fs_err(sdp, "can't read superblock: %d\n", ret);
 		goto out;
 	}
 
 	/* Set up the buffer cache and SB for real */
 	if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
-		error = -EINVAL;
+		ret = -EINVAL;
 		fs_err(sdp, "FS block size (%u) is too small for device "
 		       "block size (%u)\n",
 		       sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
 		goto out;
 	}
 	if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
-		error = -EINVAL;
+		ret = -EINVAL;
 		fs_err(sdp, "FS block size (%u) is too big for machine "
 		       "page size (%u)\n",
 		       sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
@@ -278,26 +574,21 @@
 
 	/* Get the root inode */
 	no_addr = sdp->sd_sb.sb_root_dir.no_addr;
-	if (sb->s_type == &gfs2meta_fs_type)
-		no_addr = sdp->sd_sb.sb_master_dir.no_addr;
-	inode = gfs2_lookup_root(sb, no_addr);
-	if (IS_ERR(inode)) {
-		error = PTR_ERR(inode);
-		fs_err(sdp, "can't read in root inode: %d\n", error);
+	ret = gfs2_lookup_root(sb, &sdp->sd_root_dir, no_addr, "root");
+	if (ret)
+		goto out;
+
+	/* Get the master inode */
+	no_addr = sdp->sd_sb.sb_master_dir.no_addr;
+	ret = gfs2_lookup_root(sb, &sdp->sd_master_dir, no_addr, "master");
+	if (ret) {
+		dput(sdp->sd_root_dir);
 		goto out;
 	}
-
-	sb->s_root = d_alloc_root(inode);
-	if (!sb->s_root) {
-		fs_err(sdp, "can't get root dentry\n");
-		error = -ENOMEM;
-		iput(inode);
-	} else
-		sb->s_root->d_op = &gfs2_dops;
-	
+	sb->s_root = dget(sdp->sd_args.ar_meta ? sdp->sd_master_dir : sdp->sd_root_dir);
 out:
 	gfs2_glock_dq_uninit(&sb_gh);
-	return error;
+	return ret;
 }
 
 /**
@@ -372,6 +663,7 @@
 
 static int init_journal(struct gfs2_sbd *sdp, int undo)
 {
+	struct inode *master = sdp->sd_master_dir->d_inode;
 	struct gfs2_holder ji_gh;
 	struct task_struct *p;
 	struct gfs2_inode *ip;
@@ -383,7 +675,7 @@
 		goto fail_recoverd;
 	}
 
-	sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex");
+	sdp->sd_jindex = gfs2_lookup_simple(master, "jindex");
 	if (IS_ERR(sdp->sd_jindex)) {
 		fs_err(sdp, "can't lookup journal index: %d\n", error);
 		return PTR_ERR(sdp->sd_jindex);
@@ -506,25 +798,17 @@
 {
 	int error = 0;
 	struct gfs2_inode *ip;
-	struct inode *inode;
+	struct inode *master = sdp->sd_master_dir->d_inode;
 
 	if (undo)
 		goto fail_qinode;
 
-	inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr);
-	if (IS_ERR(inode)) {
-		error = PTR_ERR(inode);
-		fs_err(sdp, "can't read in master directory: %d\n", error);
-		goto fail;
-	}
-	sdp->sd_master_dir = inode;
-
 	error = init_journal(sdp, undo);
 	if (error)
-		goto fail_master;
+		goto fail;
 
 	/* Read in the master inode number inode */
-	sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum");
+	sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum");
 	if (IS_ERR(sdp->sd_inum_inode)) {
 		error = PTR_ERR(sdp->sd_inum_inode);
 		fs_err(sdp, "can't read in inum inode: %d\n", error);
@@ -533,7 +817,7 @@
 
 
 	/* Read in the master statfs inode */
-	sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs");
+	sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
 	if (IS_ERR(sdp->sd_statfs_inode)) {
 		error = PTR_ERR(sdp->sd_statfs_inode);
 		fs_err(sdp, "can't read in statfs inode: %d\n", error);
@@ -541,7 +825,7 @@
 	}
 
 	/* Read in the resource index inode */
-	sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex");
+	sdp->sd_rindex = gfs2_lookup_simple(master, "rindex");
 	if (IS_ERR(sdp->sd_rindex)) {
 		error = PTR_ERR(sdp->sd_rindex);
 		fs_err(sdp, "can't get resource index inode: %d\n", error);
@@ -552,7 +836,7 @@
 	sdp->sd_rindex_uptodate = 0;
 
 	/* Read in the quota inode */
-	sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota");
+	sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota");
 	if (IS_ERR(sdp->sd_quota_inode)) {
 		error = PTR_ERR(sdp->sd_quota_inode);
 		fs_err(sdp, "can't get quota file inode: %d\n", error);
@@ -571,8 +855,6 @@
 	iput(sdp->sd_inum_inode);
 fail_journal:
 	init_journal(sdp, UNDO);
-fail_master:
-	iput(sdp->sd_master_dir);
 fail:
 	return error;
 }
@@ -583,6 +865,7 @@
 	char buf[30];
 	int error = 0;
 	struct gfs2_inode *ip;
+	struct inode *master = sdp->sd_master_dir->d_inode;
 
 	if (sdp->sd_args.ar_spectator)
 		return 0;
@@ -590,7 +873,7 @@
 	if (undo)
 		goto fail_qc_gh;
 
-	pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node");
+	pn = gfs2_lookup_simple(master, "per_node");
 	if (IS_ERR(pn)) {
 		error = PTR_ERR(pn);
 		fs_err(sdp, "can't find per_node directory: %d\n", error);
@@ -800,7 +1083,11 @@
 		goto fail;
 	}
 
-	init_vfs(sb, SDF_NOATIME);
+	sb->s_magic = GFS2_MAGIC;
+	sb->s_op = &gfs2_super_ops;
+	sb->s_export_op = &gfs2_export_ops;
+	sb->s_time_gran = 1;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
 
 	/* Set up the buffer cache and fill in some fake block size values
 	   to allow us to read-in the on-disk superblock. */
@@ -828,7 +1115,7 @@
 	if (error)
 		goto fail_lm;
 
-	error = init_sb(sdp, silent, DO);
+	error = init_sb(sdp, silent);
 	if (error)
 		goto fail_locking;
 
@@ -869,7 +1156,11 @@
 fail_inodes:
 	init_inodes(sdp, UNDO);
 fail_sb:
-	init_sb(sdp, 0, UNDO);
+	if (sdp->sd_root_dir)
+		dput(sdp->sd_root_dir);
+	if (sdp->sd_master_dir)
+		dput(sdp->sd_master_dir);
+	sb->s_root = NULL;
 fail_locking:
 	init_locking(sdp, &mount_gh, UNDO);
 fail_lm:
@@ -887,151 +1178,63 @@
 }
 
 static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
-		const char *dev_name, void *data, struct vfsmount *mnt)
+		       const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
+}
+
+static struct super_block *get_gfs2_sb(const char *dev_name)
 {
 	struct super_block *sb;
-	struct gfs2_sbd *sdp;
-	int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
-	if (error)
-		goto out;
-	sb = mnt->mnt_sb;
-	sdp = sb->s_fs_info;
-	sdp->sd_gfs2mnt = mnt;
-out:
-	return error;
-}
-
-static int fill_super_meta(struct super_block *sb, struct super_block *new,
-			   void *data, int silent)
-{
-	struct gfs2_sbd *sdp = sb->s_fs_info;
-	struct inode *inode;
-	int error = 0;
-
-	new->s_fs_info = sdp;
-	sdp->sd_vfs_meta = sb;
-
-	init_vfs(new, SDF_NOATIME);
-
-        /* Get the master inode */
-	inode = igrab(sdp->sd_master_dir);
-
-	new->s_root = d_alloc_root(inode);
-	if (!new->s_root) {
-		fs_err(sdp, "can't get root dentry\n");
-		error = -ENOMEM;
-		iput(inode);
-	} else
-		new->s_root->d_op = &gfs2_dops;
-
-	return error;
-}
-
-static int set_bdev_super(struct super_block *s, void *data)
-{
-	s->s_bdev = data;
-	s->s_dev = s->s_bdev->bd_dev;
-	return 0;
-}
-
-static int test_bdev_super(struct super_block *s, void *data)
-{
-	return s->s_bdev == data;
-}
-
-static struct super_block* get_gfs2_sb(const char *dev_name)
-{
-	struct kstat stat;
 	struct nameidata nd;
-	struct super_block *sb = NULL, *s;
 	int error;
 
 	error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
 	if (error) {
-		printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n",
-		       dev_name);
-		goto out;
+		printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
+		       dev_name, error);
+		return NULL;
 	}
-	error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat);
-
-	list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) {
-		if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) ||
-		    (S_ISDIR(stat.mode) &&
-		     s == nd.path.dentry->d_inode->i_sb)) {
-			sb = s;
-			goto free_nd;
-		}
-	}
-
-	printk(KERN_WARNING "GFS2: Unrecognized block device or "
-	       "mount point %s\n", dev_name);
-
-free_nd:
+	sb = nd.path.dentry->d_inode->i_sb;
+	if (sb && (sb->s_type == &gfs2_fs_type))
+		atomic_inc(&sb->s_active);
+	else
+		sb = NULL;
 	path_put(&nd.path);
-out:
 	return sb;
 }
 
 static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
 			    const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	int error = 0;
-	struct super_block *sb = NULL, *new;
+	struct super_block *sb = NULL;
 	struct gfs2_sbd *sdp;
 
 	sb = get_gfs2_sb(dev_name);
 	if (!sb) {
 		printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
-		error = -ENOENT;
-		goto error;
+		return -ENOENT;
 	}
 	sdp = sb->s_fs_info;
-	if (sdp->sd_vfs_meta) {
-		printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n");
-		error = -EBUSY;
-		goto error;
-	}
-	down(&sb->s_bdev->bd_mount_sem);
-	new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev);
-	up(&sb->s_bdev->bd_mount_sem);
-	if (IS_ERR(new)) {
-		error = PTR_ERR(new);
-		goto error;
-	}
-	new->s_flags = flags;
-	strlcpy(new->s_id, sb->s_id, sizeof(new->s_id));
-	sb_set_blocksize(new, sb->s_blocksize);
-	error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0);
-	if (error) {
-		up_write(&new->s_umount);
-		deactivate_super(new);
-		goto error;
-	}
-
-	new->s_flags |= MS_ACTIVE;
-
-	/* Grab a reference to the gfs2 mount point */
-	atomic_inc(&sdp->sd_gfs2mnt->mnt_count);
-	return simple_set_mnt(mnt, new);
-error:
-	return error;
+	mnt->mnt_sb = sb;
+	mnt->mnt_root = dget(sdp->sd_master_dir);
+	return 0;
 }
 
 static void gfs2_kill_sb(struct super_block *sb)
 {
-	if (sb->s_fs_info) {
-		gfs2_delete_debugfs_file(sb->s_fs_info);
-		gfs2_meta_syncfs(sb->s_fs_info);
-	}
-	kill_block_super(sb);
-}
-
-static void gfs2_kill_sb_meta(struct super_block *sb)
-{
 	struct gfs2_sbd *sdp = sb->s_fs_info;
-	generic_shutdown_super(sb);
-	sdp->sd_vfs_meta = NULL;
-	atomic_dec(&sdp->sd_gfs2mnt->mnt_count);
+	if (sdp) {
+		gfs2_meta_syncfs(sdp);
+		dput(sdp->sd_root_dir);
+		dput(sdp->sd_master_dir);
+		sdp->sd_root_dir = NULL;
+		sdp->sd_master_dir = NULL;
+	}
+	shrink_dcache_sb(sb);
+	kill_block_super(sb);
+	if (sdp)
+		gfs2_delete_debugfs_file(sdp);
 }
 
 struct file_system_type gfs2_fs_type = {
@@ -1046,7 +1249,6 @@
 	.name = "gfs2meta",
 	.fs_flags = FS_REQUIRES_DEV,
 	.get_sb = gfs2_get_sb_meta,
-	.kill_sb = gfs2_kill_sb_meta,
 	.owner = THIS_MODULE,
 };
 
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index e2c62f7..534e1e2 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -159,9 +159,13 @@
 	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
 
-	error = gfs2_glock_nq_m(2, ghs);
+	error = gfs2_glock_nq(ghs); /* parent */
 	if (error)
-		goto out;
+		goto out_parent;
+
+	error = gfs2_glock_nq(ghs + 1); /* child */
+	if (error)
+		goto out_child;
 
 	error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
 	if (error)
@@ -245,8 +249,10 @@
 	if (alloc_required)
 		gfs2_alloc_put(dip);
 out_gunlock:
-	gfs2_glock_dq_m(2, ghs);
-out:
+	gfs2_glock_dq(ghs + 1);
+out_child:
+	gfs2_glock_dq(ghs);
+out_parent:
 	gfs2_holder_uninit(ghs);
 	gfs2_holder_uninit(ghs + 1);
 	if (!error) {
@@ -302,7 +308,7 @@
 
 	error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
 	if (error)
-		goto out_rgrp;
+		goto out_gunlock;
 
 	error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
 	if (error)
@@ -316,6 +322,7 @@
 
 out_end_trans:
 	gfs2_trans_end(sdp);
+out_gunlock:
 	gfs2_glock_dq(ghs + 2);
 out_rgrp:
 	gfs2_holder_uninit(ghs + 2);
@@ -485,7 +492,6 @@
 	struct gfs2_holder ri_gh;
 	int error;
 
-
 	error = gfs2_rindex_hold(sdp, &ri_gh);
 	if (error)
 		return error;
@@ -495,9 +501,17 @@
 	rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
 	gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
 
-	error = gfs2_glock_nq_m(3, ghs);
+	error = gfs2_glock_nq(ghs); /* parent */
 	if (error)
-		goto out;
+		goto out_parent;
+
+	error = gfs2_glock_nq(ghs + 1); /* child */
+	if (error)
+		goto out_child;
+
+	error = gfs2_glock_nq(ghs + 2); /* rgrp */
+	if (error)
+		goto out_rgrp;
 
 	error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
 	if (error)
@@ -523,11 +537,15 @@
 	gfs2_trans_end(sdp);
 
 out_gunlock:
-	gfs2_glock_dq_m(3, ghs);
-out:
-	gfs2_holder_uninit(ghs);
-	gfs2_holder_uninit(ghs + 1);
+	gfs2_glock_dq(ghs + 2);
+out_rgrp:
 	gfs2_holder_uninit(ghs + 2);
+	gfs2_glock_dq(ghs + 1);
+out_child:
+	gfs2_holder_uninit(ghs + 1);
+	gfs2_glock_dq(ghs);
+out_parent:
+	gfs2_holder_uninit(ghs);
 	gfs2_glock_dq_uninit(&ri_gh);
 	return error;
 }
@@ -571,6 +589,54 @@
 	return 0;
 }
 
+/*
+ * gfs2_ok_to_move - check if it's ok to move a directory to another directory
+ * @this: move this
+ * @to: to here
+ *
+ * Follow @to back to the root and make sure we don't encounter @this
+ * Assumes we already hold the rename lock.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
+{
+	struct inode *dir = &to->i_inode;
+	struct super_block *sb = dir->i_sb;
+	struct inode *tmp;
+	struct qstr dotdot;
+	int error = 0;
+
+	gfs2_str2qstr(&dotdot, "..");
+
+	igrab(dir);
+
+	for (;;) {
+		if (dir == &this->i_inode) {
+			error = -EINVAL;
+			break;
+		}
+		if (dir == sb->s_root->d_inode) {
+			error = 0;
+			break;
+		}
+
+		tmp = gfs2_lookupi(dir, &dotdot, 1);
+		if (IS_ERR(tmp)) {
+			error = PTR_ERR(tmp);
+			break;
+		}
+
+		iput(dir);
+		dir = tmp;
+	}
+
+	iput(dir);
+
+	return error;
+}
+
 /**
  * gfs2_rename - Rename a file
  * @odir: Parent directory of old file name
@@ -589,7 +655,7 @@
 	struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
 	struct gfs2_inode *nip = NULL;
 	struct gfs2_sbd *sdp = GFS2_SB(odir);
-	struct gfs2_holder ghs[5], r_gh;
+	struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
 	struct gfs2_rgrpd *nrgd;
 	unsigned int num_gh;
 	int dir_rename = 0;
@@ -603,19 +669,20 @@
 			return 0;
 	}
 
-	/* Make sure we aren't trying to move a dirctory into it's subdir */
 
-	if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) {
-		dir_rename = 1;
-
-		error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0,
-					   &r_gh);
+	if (odip != ndip) {
+		error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
+					   0, &r_gh);
 		if (error)
 			goto out;
 
-		error = gfs2_ok_to_move(ip, ndip);
-		if (error)
-			goto out_gunlock_r;
+		if (S_ISDIR(ip->i_inode.i_mode)) {
+			dir_rename = 1;
+			/* don't move a dirctory into it's subdir */
+			error = gfs2_ok_to_move(ip, ndip);
+			if (error)
+				goto out_gunlock_r;
+		}
 	}
 
 	num_gh = 1;
@@ -639,9 +706,11 @@
 			gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
 	}
 
-	error = gfs2_glock_nq_m(num_gh, ghs);
-	if (error)
-		goto out_uninit;
+	for (x = 0; x < num_gh; x++) {
+		error = gfs2_glock_nq(ghs + x);
+		if (error)
+			goto out_gunlock;
+	}
 
 	/* Check out the old directory */
 
@@ -804,12 +873,12 @@
 	if (alloc_required)
 		gfs2_alloc_put(ndip);
 out_gunlock:
-	gfs2_glock_dq_m(num_gh, ghs);
-out_uninit:
-	for (x = 0; x < num_gh; x++)
+	while (x--) {
+		gfs2_glock_dq(ghs + x);
 		gfs2_holder_uninit(ghs + x);
+	}
 out_gunlock_r:
-	if (dir_rename)
+	if (r_gh.gh_gl)
 		gfs2_glock_dq_uninit(&r_gh);
 out:
 	return error;
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index f66ea0f..d5355d9 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -20,6 +20,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
 #include <linux/lm_interface.h>
+#include <linux/time.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -38,6 +39,7 @@
 #include "dir.h"
 #include "eattr.h"
 #include "bmap.h"
+#include "meta_io.h"
 
 /**
  * gfs2_write_inode - Make sure the inode is stable on the disk
@@ -50,16 +52,74 @@
 static int gfs2_write_inode(struct inode *inode, int sync)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct gfs2_holder gh;
+	struct buffer_head *bh;
+	struct timespec atime;
+	struct gfs2_dinode *di;
+	int ret = 0;
 
-	/* Check this is a "normal" inode */
-	if (test_bit(GIF_USER, &ip->i_flags)) {
-		if (current->flags & PF_MEMALLOC)
-			return 0;
-		if (sync)
-			gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
+	/* Check this is a "normal" inode, etc */
+	if (!test_bit(GIF_USER, &ip->i_flags) ||
+	    (current->flags & PF_MEMALLOC))
+		return 0;
+	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	if (ret)
+		goto do_flush;
+	ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
+	if (ret)
+		goto do_unlock;
+	ret = gfs2_meta_inode_buffer(ip, &bh);
+	if (ret == 0) {
+		di = (struct gfs2_dinode *)bh->b_data;
+		atime.tv_sec = be64_to_cpu(di->di_atime);
+		atime.tv_nsec = be32_to_cpu(di->di_atime_nsec);
+		if (timespec_compare(&inode->i_atime, &atime) > 0) {
+			gfs2_trans_add_bh(ip->i_gl, bh, 1);
+			gfs2_dinode_out(ip, bh->b_data);
+		}
+		brelse(bh);
 	}
+	gfs2_trans_end(sdp);
+do_unlock:
+	gfs2_glock_dq_uninit(&gh);
+do_flush:
+	if (sync != 0)
+		gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
+	return ret;
+}
 
-	return 0;
+/**
+ * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
+ * @sdp: the filesystem
+ *
+ * Returns: errno
+ */
+
+static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+{
+	struct gfs2_holder t_gh;
+	int error;
+
+	gfs2_quota_sync(sdp);
+	gfs2_statfs_sync(sdp);
+
+	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
+				   &t_gh);
+	if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+		return error;
+
+	gfs2_meta_syncfs(sdp);
+	gfs2_log_shutdown(sdp);
+
+	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+
+	if (t_gh.gh_gl)
+		gfs2_glock_dq_uninit(&t_gh);
+
+	gfs2_quota_cleanup(sdp);
+
+	return error;
 }
 
 /**
@@ -73,12 +133,6 @@
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	int error;
 
-	if (!sdp)
-		return;
-
-	if (!strncmp(sb->s_type->name, "gfs2meta", 8))
-		return; /* Nothing to do */
-
 	/*  Unfreeze the filesystem, if we need to  */
 
 	mutex_lock(&sdp->sd_freeze_lock);
@@ -101,7 +155,6 @@
 
 	/*  Release stuff  */
 
-	iput(sdp->sd_master_dir);
 	iput(sdp->sd_jindex);
 	iput(sdp->sd_inum_inode);
 	iput(sdp->sd_statfs_inode);
@@ -152,6 +205,7 @@
  *
  * Flushes the log to disk.
  */
+
 static int gfs2_sync_fs(struct super_block *sb, int wait)
 {
 	sb->s_dirt = 0;
@@ -270,14 +324,6 @@
 		}
 	}
 
-	if (*flags & (MS_NOATIME | MS_NODIRATIME))
-		set_bit(SDF_NOATIME, &sdp->sd_flags);
-	else
-		clear_bit(SDF_NOATIME, &sdp->sd_flags);
-
-	/* Don't let the VFS update atimes.  GFS2 handles this itself. */
-	*flags |= MS_NOATIME | MS_NODIRATIME;
-
 	return error;
 }
 
@@ -295,6 +341,7 @@
  * inode's blocks, or alternatively pass the baton on to another
  * node for later deallocation.
  */
+
 static void gfs2_drop_inode(struct inode *inode)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
@@ -333,6 +380,16 @@
 	}
 }
 
+static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
+{
+	do {
+		if (d1 == d2)
+			return 1;
+		d1 = d1->d_parent;
+	} while (!IS_ROOT(d1));
+	return 0;
+}
+
 /**
  * gfs2_show_options - Show mount options for /proc/mounts
  * @s: seq_file structure
@@ -346,6 +403,8 @@
 	struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
 	struct gfs2_args *args = &sdp->sd_args;
 
+	if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
+		seq_printf(s, ",meta");
 	if (args->ar_lockproto[0])
 		seq_printf(s, ",lockproto=%s", args->ar_lockproto);
 	if (args->ar_locktable[0])
@@ -414,6 +473,7 @@
  * conversion on the iopen lock, but we can change that later. This
  * is safe, just less efficient.
  */
+
 static void gfs2_delete_inode(struct inode *inode)
 {
 	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
@@ -478,8 +538,6 @@
 	clear_inode(inode);
 }
 
-
-
 static struct inode *gfs2_alloc_inode(struct super_block *sb)
 {
 	struct gfs2_inode *ip;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ca83199..c3ba3d9 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -33,313 +33,6 @@
 #include "trans.h"
 #include "util.h"
 
-static const u32 gfs2_old_fs_formats[] = {
-        0
-};
-
-static const u32 gfs2_old_multihost_formats[] = {
-        0
-};
-
-/**
- * gfs2_tune_init - Fill a gfs2_tune structure with default values
- * @gt: tune
- *
- */
-
-void gfs2_tune_init(struct gfs2_tune *gt)
-{
-	spin_lock_init(&gt->gt_spin);
-
-	gt->gt_demote_secs = 300;
-	gt->gt_incore_log_blocks = 1024;
-	gt->gt_log_flush_secs = 60;
-	gt->gt_recoverd_secs = 60;
-	gt->gt_logd_secs = 1;
-	gt->gt_quotad_secs = 5;
-	gt->gt_quota_simul_sync = 64;
-	gt->gt_quota_warn_period = 10;
-	gt->gt_quota_scale_num = 1;
-	gt->gt_quota_scale_den = 1;
-	gt->gt_quota_cache_secs = 300;
-	gt->gt_quota_quantum = 60;
-	gt->gt_atime_quantum = 3600;
-	gt->gt_new_files_jdata = 0;
-	gt->gt_max_readahead = 1 << 18;
-	gt->gt_stall_secs = 600;
-	gt->gt_complain_secs = 10;
-	gt->gt_statfs_quantum = 30;
-	gt->gt_statfs_slow = 0;
-}
-
-/**
- * gfs2_check_sb - Check superblock
- * @sdp: the filesystem
- * @sb: The superblock
- * @silent: Don't print a message if the check fails
- *
- * Checks the version code of the FS is one that we understand how to
- * read and that the sizes of the various on-disk structures have not
- * changed.
- */
-
-int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
-{
-	unsigned int x;
-
-	if (sb->sb_magic != GFS2_MAGIC ||
-	    sb->sb_type != GFS2_METATYPE_SB) {
-		if (!silent)
-			printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
-		return -EINVAL;
-	}
-
-	/*  If format numbers match exactly, we're done.  */
-
-	if (sb->sb_fs_format == GFS2_FORMAT_FS &&
-	    sb->sb_multihost_format == GFS2_FORMAT_MULTI)
-		return 0;
-
-	if (sb->sb_fs_format != GFS2_FORMAT_FS) {
-		for (x = 0; gfs2_old_fs_formats[x]; x++)
-			if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
-				break;
-
-		if (!gfs2_old_fs_formats[x]) {
-			printk(KERN_WARNING
-			       "GFS2: code version (%u, %u) is incompatible "
-			       "with ondisk format (%u, %u)\n",
-			       GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
-			       sb->sb_fs_format, sb->sb_multihost_format);
-			printk(KERN_WARNING
-			       "GFS2: I don't know how to upgrade this FS\n");
-			return -EINVAL;
-		}
-	}
-
-	if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
-		for (x = 0; gfs2_old_multihost_formats[x]; x++)
-			if (gfs2_old_multihost_formats[x] ==
-			    sb->sb_multihost_format)
-				break;
-
-		if (!gfs2_old_multihost_formats[x]) {
-			printk(KERN_WARNING
-			       "GFS2: code version (%u, %u) is incompatible "
-			       "with ondisk format (%u, %u)\n",
-			       GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
-			       sb->sb_fs_format, sb->sb_multihost_format);
-			printk(KERN_WARNING
-			       "GFS2: I don't know how to upgrade this FS\n");
-			return -EINVAL;
-		}
-	}
-
-	if (!sdp->sd_args.ar_upgrade) {
-		printk(KERN_WARNING
-		       "GFS2: code version (%u, %u) is incompatible "
-		       "with ondisk format (%u, %u)\n",
-		       GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
-		       sb->sb_fs_format, sb->sb_multihost_format);
-		printk(KERN_INFO
-		       "GFS2: Use the \"upgrade\" mount option to upgrade "
-		       "the FS\n");
-		printk(KERN_INFO "GFS2: See the manual for more details\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-
-static void end_bio_io_page(struct bio *bio, int error)
-{
-	struct page *page = bio->bi_private;
-
-	if (!error)
-		SetPageUptodate(page);
-	else
-		printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
-	unlock_page(page);
-}
-
-static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
-{
-	const struct gfs2_sb *str = buf;
-
-	sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
-	sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
-	sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
-	sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
-	sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
-	sb->sb_bsize = be32_to_cpu(str->sb_bsize);
-	sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
-	sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
-	sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
-	sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
-	sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
-
-	memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
-	memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-}
-
-/**
- * gfs2_read_super - Read the gfs2 super block from disk
- * @sdp: The GFS2 super block
- * @sector: The location of the super block
- * @error: The error code to return
- *
- * This uses the bio functions to read the super block from disk
- * because we want to be 100% sure that we never read cached data.
- * A super block is read twice only during each GFS2 mount and is
- * never written to by the filesystem. The first time its read no
- * locks are held, and the only details which are looked at are those
- * relating to the locking protocol. Once locking is up and working,
- * the sb is read again under the lock to establish the location of
- * the master directory (contains pointers to journals etc) and the
- * root directory.
- *
- * Returns: 0 on success or error
- */
-
-int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
-{
-	struct super_block *sb = sdp->sd_vfs;
-	struct gfs2_sb *p;
-	struct page *page;
-	struct bio *bio;
-
-	page = alloc_page(GFP_NOFS);
-	if (unlikely(!page))
-		return -ENOBUFS;
-
-	ClearPageUptodate(page);
-	ClearPageDirty(page);
-	lock_page(page);
-
-	bio = bio_alloc(GFP_NOFS, 1);
-	if (unlikely(!bio)) {
-		__free_page(page);
-		return -ENOBUFS;
-	}
-
-	bio->bi_sector = sector * (sb->s_blocksize >> 9);
-	bio->bi_bdev = sb->s_bdev;
-	bio_add_page(bio, page, PAGE_SIZE, 0);
-
-	bio->bi_end_io = end_bio_io_page;
-	bio->bi_private = page;
-	submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
-	wait_on_page_locked(page);
-	bio_put(bio);
-	if (!PageUptodate(page)) {
-		__free_page(page);
-		return -EIO;
-	}
-	p = kmap(page);
-	gfs2_sb_in(&sdp->sd_sb, p);
-	kunmap(page);
-	__free_page(page);
-	return 0;
-}
-
-/**
- * gfs2_read_sb - Read super block
- * @sdp: The GFS2 superblock
- * @gl: the glock for the superblock (assumed to be held)
- * @silent: Don't print message if mount fails
- *
- */
-
-int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
-{
-	u32 hash_blocks, ind_blocks, leaf_blocks;
-	u32 tmp_blocks;
-	unsigned int x;
-	int error;
-
-	error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
-	if (error) {
-		if (!silent)
-			fs_err(sdp, "can't read superblock\n");
-		return error;
-	}
-
-	error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
-	if (error)
-		return error;
-
-	sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
-			       GFS2_BASIC_BLOCK_SHIFT;
-	sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
-	sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
-			  sizeof(struct gfs2_dinode)) / sizeof(u64);
-	sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
-			  sizeof(struct gfs2_meta_header)) / sizeof(u64);
-	sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
-	sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
-	sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
-	sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64);
-	sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
-				sizeof(struct gfs2_meta_header)) /
-			        sizeof(struct gfs2_quota_change);
-
-	/* Compute maximum reservation required to add a entry to a directory */
-
-	hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
-			     sdp->sd_jbsize);
-
-	ind_blocks = 0;
-	for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
-		tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
-		ind_blocks += tmp_blocks;
-	}
-
-	leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
-
-	sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
-
-	sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
-				sizeof(struct gfs2_dinode);
-	sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
-	for (x = 2;; x++) {
-		u64 space, d;
-		u32 m;
-
-		space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
-		d = space;
-		m = do_div(d, sdp->sd_inptrs);
-
-		if (d != sdp->sd_heightsize[x - 1] || m)
-			break;
-		sdp->sd_heightsize[x] = space;
-	}
-	sdp->sd_max_height = x;
-	sdp->sd_heightsize[x] = ~0;
-	gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
-
-	sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
-				 sizeof(struct gfs2_dinode);
-	sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
-	for (x = 2;; x++) {
-		u64 space, d;
-		u32 m;
-
-		space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
-		d = space;
-		m = do_div(d, sdp->sd_inptrs);
-
-		if (d != sdp->sd_jheightsize[x - 1] || m)
-			break;
-		sdp->sd_jheightsize[x] = space;
-	}
-	sdp->sd_max_jheight = x;
-	sdp->sd_jheightsize[x] = ~0;
-	gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
-
-	return 0;
-}
-
 /**
  * gfs2_jindex_hold - Grab a lock on the jindex
  * @sdp: The GFS2 superblock
@@ -581,39 +274,6 @@
 	return error;
 }
 
-/**
- * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
- * @sdp: the filesystem
- *
- * Returns: errno
- */
-
-int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
-{
-	struct gfs2_holder t_gh;
-	int error;
-
-	gfs2_quota_sync(sdp);
-	gfs2_statfs_sync(sdp);
-
-	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
-				   &t_gh);
-	if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
-		return error;
-
-	gfs2_meta_syncfs(sdp);
-	gfs2_log_shutdown(sdp);
-
-	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
-
-	if (t_gh.gh_gl)
-		gfs2_glock_dq_uninit(&t_gh);
-
-	gfs2_quota_cleanup(sdp);
-
-	return error;
-}
-
 static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
 {
 	const struct gfs2_statfs_change *str = buf;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 44361ec..50a4c9b 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -12,11 +12,6 @@
 
 #include "incore.h"
 
-void gfs2_tune_init(struct gfs2_tune *gt);
-
-int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
-int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
-int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector);
 void gfs2_lm_unmount(struct gfs2_sbd *sdp);
 
 static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
@@ -40,7 +35,6 @@
 			      struct gfs2_inode **ipp);
 
 int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
-int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
 
 int gfs2_statfs_init(struct gfs2_sbd *sdp);
 void gfs2_statfs_change(struct gfs2_sbd *sdp,
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 7484655..7e1879f 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -269,14 +269,6 @@
 ARGS_ATTR(suiddir,         "%d\n");
 ARGS_ATTR(data,            "%d\n");
 
-/* one oddball doesn't fit the macro mold */
-static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%d\n",
-			!!test_bit(SDF_NOATIME, &sdp->sd_flags));
-}
-static struct args_attr args_attr_noatime = __ATTR_RO(noatime);
-
 static struct attribute *args_attrs[] = {
 	&args_attr_lockproto.attr,
 	&args_attr_locktable.attr,
@@ -292,7 +284,6 @@
 	&args_attr_quota.attr,
 	&args_attr_suiddir.attr,
 	&args_attr_data.attr,
-	&args_attr_noatime.attr,
 	NULL,
 };
 
@@ -407,7 +398,6 @@
 TUNE_ATTR(log_flush_secs, 0);
 TUNE_ATTR(quota_warn_period, 0);
 TUNE_ATTR(quota_quantum, 0);
-TUNE_ATTR(atime_quantum, 0);
 TUNE_ATTR(max_readahead, 0);
 TUNE_ATTR(complain_secs, 0);
 TUNE_ATTR(statfs_slow, 0);
@@ -427,7 +417,6 @@
 	&tune_attr_log_flush_secs.attr,
 	&tune_attr_quota_warn_period.attr,
 	&tune_attr_quota_quantum.attr,
-	&tune_attr_atime_quantum.attr,
 	&tune_attr_max_readahead.attr,
 	&tune_attr_complain_secs.attr,
 	&tune_attr_statfs_slow.attr,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index ecc3330..7408227 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -120,22 +120,21 @@
  * a pointer to that same buffer (for convenience).
  */
 
-char *disk_name(struct gendisk *hd, int part, char *buf)
+char *disk_name(struct gendisk *hd, int partno, char *buf)
 {
-	if (!part)
+	if (!partno)
 		snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
 	else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
-		snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, part);
+		snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
 	else
-		snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, part);
+		snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
 
 	return buf;
 }
 
 const char *bdevname(struct block_device *bdev, char *buf)
 {
-	int part = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor;
-	return disk_name(bdev->bd_disk, part, buf);
+	return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
 }
 
 EXPORT_SYMBOL(bdevname);
@@ -169,7 +168,7 @@
 	if (isdigit(state->name[strlen(state->name)-1]))
 		sprintf(state->name, "p");
 
-	state->limit = hd->minors;
+	state->limit = disk_max_parts(hd);
 	i = res = err = 0;
 	while (!res && check_part[i]) {
 		memset(&state->parts, 0, sizeof(state->parts));
@@ -204,21 +203,22 @@
 	return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
 }
 
-static ssize_t part_size_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
+ssize_t part_size_show(struct device *dev,
+		       struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
 	return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
 }
 
-static ssize_t part_stat_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
+ssize_t part_stat_show(struct device *dev,
+		       struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
+	int cpu;
 
-	preempt_disable();
-	part_round_stats(p);
-	preempt_enable();
+	cpu = part_stat_lock();
+	part_round_stats(cpu, p);
+	part_stat_unlock();
 	return sprintf(buf,
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
@@ -238,17 +238,17 @@
 }
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
-static ssize_t part_fail_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
+ssize_t part_fail_show(struct device *dev,
+		       struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
 
 	return sprintf(buf, "%d\n", p->make_it_fail);
 }
 
-static ssize_t part_fail_store(struct device *dev,
-			       struct device_attribute *attr,
-			       const char *buf, size_t count)
+ssize_t part_fail_store(struct device *dev,
+			struct device_attribute *attr,
+			const char *buf, size_t count)
 {
 	struct hd_struct *p = dev_to_part(dev);
 	int i;
@@ -300,40 +300,34 @@
 	.release	= part_release,
 };
 
-static inline void partition_sysfs_add_subdir(struct hd_struct *p)
+static void delete_partition_rcu_cb(struct rcu_head *head)
 {
-	struct kobject *k;
+	struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
 
-	k = kobject_get(&p->dev.kobj);
-	p->holder_dir = kobject_create_and_add("holders", k);
-	kobject_put(k);
+	part->start_sect = 0;
+	part->nr_sects = 0;
+	part_stat_set_all(part, 0);
+	put_device(part_to_dev(part));
 }
 
-static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
+void delete_partition(struct gendisk *disk, int partno)
 {
-	struct kobject *k;
+	struct disk_part_tbl *ptbl = disk->part_tbl;
+	struct hd_struct *part;
 
-	k = kobject_get(&disk->dev.kobj);
-	disk->holder_dir = kobject_create_and_add("holders", k);
-	disk->slave_dir = kobject_create_and_add("slaves", k);
-	kobject_put(k);
-}
-
-void delete_partition(struct gendisk *disk, int part)
-{
-	struct hd_struct *p = disk->part[part-1];
-
-	if (!p)
+	if (partno >= ptbl->len)
 		return;
-	if (!p->nr_sects)
+
+	part = ptbl->part[partno];
+	if (!part)
 		return;
-	disk->part[part-1] = NULL;
-	p->start_sect = 0;
-	p->nr_sects = 0;
-	part_stat_set_all(p, 0);
-	kobject_put(p->holder_dir);
-	device_del(&p->dev);
-	put_device(&p->dev);
+
+	blk_free_devt(part_devt(part));
+	rcu_assign_pointer(ptbl->part[partno], NULL);
+	kobject_put(part->holder_dir);
+	device_del(part_to_dev(part));
+
+	call_rcu(&part->rcu_head, delete_partition_rcu_cb);
 }
 
 static ssize_t whole_disk_show(struct device *dev,
@@ -344,102 +338,132 @@
 static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
 		   whole_disk_show, NULL);
 
-int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
+int add_partition(struct gendisk *disk, int partno,
+		  sector_t start, sector_t len, int flags)
 {
 	struct hd_struct *p;
+	dev_t devt = MKDEV(0, 0);
+	struct device *ddev = disk_to_dev(disk);
+	struct device *pdev;
+	struct disk_part_tbl *ptbl;
+	const char *dname;
 	int err;
 
+	err = disk_expand_part_tbl(disk, partno);
+	if (err)
+		return err;
+	ptbl = disk->part_tbl;
+
+	if (ptbl->part[partno])
+		return -EBUSY;
+
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 
 	if (!init_part_stats(p)) {
 		err = -ENOMEM;
-		goto out0;
+		goto out_free;
 	}
+	pdev = part_to_dev(p);
+
 	p->start_sect = start;
 	p->nr_sects = len;
-	p->partno = part;
-	p->policy = disk->policy;
+	p->partno = partno;
+	p->policy = get_disk_ro(disk);
 
-	if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1]))
-		snprintf(p->dev.bus_id, BUS_ID_SIZE,
-		"%sp%d", disk->dev.bus_id, part);
+	dname = dev_name(ddev);
+	if (isdigit(dname[strlen(dname) - 1]))
+		snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno);
 	else
-		snprintf(p->dev.bus_id, BUS_ID_SIZE,
-			 "%s%d", disk->dev.bus_id, part);
+		snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno);
 
-	device_initialize(&p->dev);
-	p->dev.devt = MKDEV(disk->major, disk->first_minor + part);
-	p->dev.class = &block_class;
-	p->dev.type = &part_type;
-	p->dev.parent = &disk->dev;
-	disk->part[part-1] = p;
+	device_initialize(pdev);
+	pdev->class = &block_class;
+	pdev->type = &part_type;
+	pdev->parent = ddev;
+
+	err = blk_alloc_devt(p, &devt);
+	if (err)
+		goto out_free;
+	pdev->devt = devt;
 
 	/* delay uevent until 'holders' subdir is created */
-	p->dev.uevent_suppress = 1;
-	err = device_add(&p->dev);
+	pdev->uevent_suppress = 1;
+	err = device_add(pdev);
 	if (err)
-		goto out1;
-	partition_sysfs_add_subdir(p);
-	p->dev.uevent_suppress = 0;
+		goto out_put;
+
+	err = -ENOMEM;
+	p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
+	if (!p->holder_dir)
+		goto out_del;
+
+	pdev->uevent_suppress = 0;
 	if (flags & ADDPART_FLAG_WHOLEDISK) {
-		err = device_create_file(&p->dev, &dev_attr_whole_disk);
+		err = device_create_file(pdev, &dev_attr_whole_disk);
 		if (err)
-			goto out2;
+			goto out_del;
 	}
 
+	/* everything is up and running, commence */
+	INIT_RCU_HEAD(&p->rcu_head);
+	rcu_assign_pointer(ptbl->part[partno], p);
+
 	/* suppress uevent if the disk supresses it */
-	if (!disk->dev.uevent_suppress)
-		kobject_uevent(&p->dev.kobj, KOBJ_ADD);
+	if (!ddev->uevent_suppress)
+		kobject_uevent(&pdev->kobj, KOBJ_ADD);
 
 	return 0;
 
-out2:
-	device_del(&p->dev);
-out1:
-	put_device(&p->dev);
-	free_part_stats(p);
-out0:
+out_free:
 	kfree(p);
 	return err;
+out_del:
+	kobject_put(p->holder_dir);
+	device_del(pdev);
+out_put:
+	put_device(pdev);
+	blk_free_devt(devt);
+	return err;
 }
 
 /* Not exported, helper to add_disk(). */
 void register_disk(struct gendisk *disk)
 {
+	struct device *ddev = disk_to_dev(disk);
 	struct block_device *bdev;
+	struct disk_part_iter piter;
+	struct hd_struct *part;
 	char *s;
-	int i;
-	struct hd_struct *p;
 	int err;
 
-	disk->dev.parent = disk->driverfs_dev;
-	disk->dev.devt = MKDEV(disk->major, disk->first_minor);
+	ddev->parent = disk->driverfs_dev;
 
-	strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE);
+	strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE);
 	/* ewww... some of these buggers have / in the name... */
-	s = strchr(disk->dev.bus_id, '/');
+	s = strchr(ddev->bus_id, '/');
 	if (s)
 		*s = '!';
 
 	/* delay uevents, until we scanned partition table */
-	disk->dev.uevent_suppress = 1;
+	ddev->uevent_suppress = 1;
 
-	if (device_add(&disk->dev))
+	if (device_add(ddev))
 		return;
 #ifndef CONFIG_SYSFS_DEPRECATED
-	err = sysfs_create_link(block_depr, &disk->dev.kobj,
-				kobject_name(&disk->dev.kobj));
+	err = sysfs_create_link(block_depr, &ddev->kobj,
+				kobject_name(&ddev->kobj));
 	if (err) {
-		device_del(&disk->dev);
+		device_del(ddev);
 		return;
 	}
 #endif
-	disk_sysfs_add_subdirs(disk);
+	disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
+	disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
 
 	/* No minors to use for partitions */
-	if (disk->minors == 1)
+	if (!disk_partitionable(disk))
 		goto exit;
 
 	/* No such device (e.g., media were just removed) */
@@ -458,41 +482,57 @@
 
 exit:
 	/* announce disk after possible partitions are created */
-	disk->dev.uevent_suppress = 0;
-	kobject_uevent(&disk->dev.kobj, KOBJ_ADD);
+	ddev->uevent_suppress = 0;
+	kobject_uevent(&ddev->kobj, KOBJ_ADD);
 
 	/* announce possible partitions */
-	for (i = 1; i < disk->minors; i++) {
-		p = disk->part[i-1];
-		if (!p || !p->nr_sects)
-			continue;
-		kobject_uevent(&p->dev.kobj, KOBJ_ADD);
-	}
+	disk_part_iter_init(&piter, disk, 0);
+	while ((part = disk_part_iter_next(&piter)))
+		kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
+	disk_part_iter_exit(&piter);
 }
 
 int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 {
+	struct disk_part_iter piter;
+	struct hd_struct *part;
 	struct parsed_partitions *state;
-	int p, res;
+	int p, highest, res;
 
 	if (bdev->bd_part_count)
 		return -EBUSY;
 	res = invalidate_partition(disk, 0);
 	if (res)
 		return res;
-	bdev->bd_invalidated = 0;
-	for (p = 1; p < disk->minors; p++)
-		delete_partition(disk, p);
+
+	disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
+	while ((part = disk_part_iter_next(&piter)))
+		delete_partition(disk, part->partno);
+	disk_part_iter_exit(&piter);
+
 	if (disk->fops->revalidate_disk)
 		disk->fops->revalidate_disk(disk);
+	check_disk_size_change(disk, bdev);
+	bdev->bd_invalidated = 0;
 	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
 		return 0;
 	if (IS_ERR(state))	/* I/O error reading the partition table */
 		return -EIO;
 
 	/* tell userspace that the media / partition table may have changed */
-	kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE);
+	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
 
+	/* Detect the highest partition number and preallocate
+	 * disk->part_tbl.  This is an optimization and not strictly
+	 * necessary.
+	 */
+	for (p = 1, highest = 0; p < state->limit; p++)
+		if (state->parts[p].size)
+			highest = p;
+
+	disk_expand_part_tbl(disk, highest);
+
+	/* add partitions */
 	for (p = 1; p < state->limit; p++) {
 		sector_t size = state->parts[p].size;
 		sector_t from = state->parts[p].from;
@@ -541,25 +581,31 @@
 
 void del_gendisk(struct gendisk *disk)
 {
-	int p;
+	struct disk_part_iter piter;
+	struct hd_struct *part;
 
 	/* invalidate stuff */
-	for (p = disk->minors - 1; p > 0; p--) {
-		invalidate_partition(disk, p);
-		delete_partition(disk, p);
+	disk_part_iter_init(&piter, disk,
+			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
+	while ((part = disk_part_iter_next(&piter))) {
+		invalidate_partition(disk, part->partno);
+		delete_partition(disk, part->partno);
 	}
+	disk_part_iter_exit(&piter);
+
 	invalidate_partition(disk, 0);
-	disk->capacity = 0;
+	blk_free_devt(disk_to_dev(disk)->devt);
+	set_capacity(disk, 0);
 	disk->flags &= ~GENHD_FL_UP;
 	unlink_gendisk(disk);
-	disk_stat_set_all(disk, 0);
-	disk->stamp = 0;
+	part_stat_set_all(&disk->part0, 0);
+	disk->part0.stamp = 0;
 
-	kobject_put(disk->holder_dir);
+	kobject_put(disk->part0.holder_dir);
 	kobject_put(disk->slave_dir);
 	disk->driverfs_dev = NULL;
 #ifndef CONFIG_SYSFS_DEPRECATED
-	sysfs_remove_link(block_depr, disk->dev.bus_id);
+	sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
 #endif
-	device_del(&disk->dev);
+	device_del(disk_to_dev(disk));
 }
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
index 17ae8ec..98dbe1a 100644
--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h
@@ -5,15 +5,13 @@
  * add_gd_partition adds a partitions details to the devices partition
  * description.
  */
-enum { MAX_PART = 256 };
-
 struct parsed_partitions {
 	char name[BDEVNAME_SIZE];
 	struct {
 		sector_t from;
 		sector_t size;
 		int flags;
-	} parts[MAX_PART];
+	} parts[DISK_MAX_PARTS];
 	int next;
 	int limit;
 };
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 986061a..36d5fcd 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1001,12 +1001,13 @@
 	 * We can get an EOPNOTSUPP to ordered writes.  Here we clear the
 	 * ordered flag and reissue them.  Because we can't tell the higher
 	 * layers directly that they should not issue ordered I/O anymore, they
-	 * need to check if the ordered flag was cleared during I/O completion.
+	 * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
 	 */
 	if ((bp->b_error == EOPNOTSUPP) &&
 	    (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
 		XB_TRACE(bp, "ordered_retry", bp->b_iodone);
 		bp->b_flags &= ~XBF_ORDERED;
+		bp->b_flags |= _XFS_BARRIER_FAILED;
 		xfs_buf_iorequest(bp);
 	} else if (bp->b_iodone)
 		(*(bp->b_iodone))(bp);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index fe01099..456519a 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -85,6 +85,14 @@
 	 * modifications being lost.
 	 */
 	_XBF_PAGE_LOCKED = (1 << 22),
+
+	/*
+	 * If we try a barrier write, but it fails we have to communicate
+	 * this to the upper layers.  Unfortunately b_error gets overwritten
+	 * when the buffer is re-issued so we have to add another flag to
+	 * keep this information.
+	 */
+	_XFS_BARRIER_FAILED = (1 << 23),
 } xfs_buf_flags_t;
 
 typedef enum {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 503ea89..0b02c64 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1033,11 +1033,12 @@
 	l = iclog->ic_log;
 
 	/*
-	 * If the ordered flag has been removed by a lower
-	 * layer, it means the underlyin device no longer supports
+	 * If the _XFS_BARRIER_FAILED flag was set by a lower
+	 * layer, it means the underlying device no longer supports
 	 * barrier I/O. Warn loudly and turn off barriers.
 	 */
-	if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ISORDERED(bp)) {
+	if (bp->b_flags & _XFS_BARRIER_FAILED) {
+		bp->b_flags &= ~_XFS_BARRIER_FAILED;
 		l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
 		xfs_fs_cmn_err(CE_WARN, l->l_mp,
 				"xlog_iodone: Barriers are no longer supported"
diff --git a/include/asm-x86/a.out-core.h b/include/asm-x86/a.out-core.h
index 714207a..f570576 100644
--- a/include/asm-x86/a.out-core.h
+++ b/include/asm-x86/a.out-core.h
@@ -9,8 +9,8 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
-#ifndef _ASM_A_OUT_CORE_H
-#define _ASM_A_OUT_CORE_H
+#ifndef ASM_X86__A_OUT_CORE_H
+#define ASM_X86__A_OUT_CORE_H
 
 #ifdef __KERNEL__
 #ifdef CONFIG_X86_32
@@ -70,4 +70,4 @@
 
 #endif /* CONFIG_X86_32 */
 #endif /* __KERNEL__ */
-#endif /* _ASM_A_OUT_CORE_H */
+#endif /* ASM_X86__A_OUT_CORE_H */
diff --git a/include/asm-x86/a.out.h b/include/asm-x86/a.out.h
index 4684f97..0948748 100644
--- a/include/asm-x86/a.out.h
+++ b/include/asm-x86/a.out.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_A_OUT_H
-#define _ASM_X86_A_OUT_H
+#ifndef ASM_X86__A_OUT_H
+#define ASM_X86__A_OUT_H
 
 struct exec
 {
@@ -17,4 +17,4 @@
 #define N_DRSIZE(a)	((a).a_drsize)
 #define N_SYMSIZE(a)	((a).a_syms)
 
-#endif /* _ASM_X86_A_OUT_H */
+#endif /* ASM_X86__A_OUT_H */
diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h
index 35d1743..392e173 100644
--- a/include/asm-x86/acpi.h
+++ b/include/asm-x86/acpi.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_ACPI_H
-#define _ASM_X86_ACPI_H
+#ifndef ASM_X86__ACPI_H
+#define ASM_X86__ACPI_H
 
 /*
  *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
@@ -175,4 +175,4 @@
 
 #define acpi_unlazy_tlb(x)	leave_mm(x)
 
-#endif /*__X86_ASM_ACPI_H*/
+#endif /* ASM_X86__ACPI_H */
diff --git a/include/asm-x86/agp.h b/include/asm-x86/agp.h
index e4004a9..3617fd4 100644
--- a/include/asm-x86/agp.h
+++ b/include/asm-x86/agp.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_AGP_H
-#define _ASM_X86_AGP_H
+#ifndef ASM_X86__AGP_H
+#define ASM_X86__AGP_H
 
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -32,4 +32,4 @@
 #define free_gatt_pages(table, order)	\
 	free_pages((unsigned long)(table), (order))
 
-#endif
+#endif /* ASM_X86__AGP_H */
diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h
index f6aa18e..22d3c98 100644
--- a/include/asm-x86/alternative.h
+++ b/include/asm-x86/alternative.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_ALTERNATIVE_H
-#define _ASM_X86_ALTERNATIVE_H
+#ifndef ASM_X86__ALTERNATIVE_H
+#define ASM_X86__ALTERNATIVE_H
 
 #include <linux/types.h>
 #include <linux/stddef.h>
@@ -180,4 +180,4 @@
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_early(void *addr, const void *opcode, size_t len);
 
-#endif /* _ASM_X86_ALTERNATIVE_H */
+#endif /* ASM_X86__ALTERNATIVE_H */
diff --git a/include/asm-x86/amd_iommu.h b/include/asm-x86/amd_iommu.h
index 30a1204..783f43e 100644
--- a/include/asm-x86/amd_iommu.h
+++ b/include/asm-x86/amd_iommu.h
@@ -17,8 +17,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#ifndef _ASM_X86_AMD_IOMMU_H
-#define _ASM_X86_AMD_IOMMU_H
+#ifndef ASM_X86__AMD_IOMMU_H
+#define ASM_X86__AMD_IOMMU_H
 
 #ifdef CONFIG_AMD_IOMMU
 extern int amd_iommu_init(void);
@@ -29,4 +29,4 @@
 static inline void amd_iommu_detect(void) { }
 #endif
 
-#endif
+#endif /* ASM_X86__AMD_IOMMU_H */
diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h
index dcc8120..1ffa4e5 100644
--- a/include/asm-x86/amd_iommu_types.h
+++ b/include/asm-x86/amd_iommu_types.h
@@ -17,8 +17,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#ifndef __AMD_IOMMU_TYPES_H__
-#define __AMD_IOMMU_TYPES_H__
+#ifndef ASM_X86__AMD_IOMMU_TYPES_H
+#define ASM_X86__AMD_IOMMU_TYPES_H
 
 #include <linux/types.h>
 #include <linux/list.h>
@@ -341,4 +341,4 @@
 	return (((u16)bus) << 8) | devfn;
 }
 
-#endif
+#endif /* ASM_X86__AMD_IOMMU_TYPES_H */
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 133c998..65590c9 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_APIC_H
-#define _ASM_X86_APIC_H
+#ifndef ASM_X86__APIC_H
+#define ASM_X86__APIC_H
 
 #include <linux/pm.h>
 #include <linux/delay.h>
@@ -54,6 +54,11 @@
 #endif
 
 extern int is_vsmp_box(void);
+extern void xapic_wait_icr_idle(void);
+extern u32 safe_xapic_wait_icr_idle(void);
+extern u64 xapic_icr_read(void);
+extern void xapic_icr_write(u32, u32);
+extern int setup_profiling_timer(unsigned int);
 
 static inline void native_apic_write(unsigned long reg, u32 v)
 {
@@ -76,9 +81,7 @@
 static inline void ack_APIC_irq(void)
 {
 	/*
-	 * ack_APIC_irq() actually gets compiled as a single instruction:
-	 * - a single rmw on Pentium/82489DX
-	 * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
+	 * ack_APIC_irq() actually gets compiled as a single instruction
 	 * ... yummie.
 	 */
 
@@ -128,4 +131,4 @@
 
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
-#endif /* __ASM_APIC_H */
+#endif /* ASM_X86__APIC_H */
diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h
index 6b9008c..c40687d 100644
--- a/include/asm-x86/apicdef.h
+++ b/include/asm-x86/apicdef.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_APICDEF_H
-#define _ASM_X86_APICDEF_H
+#ifndef ASM_X86__APICDEF_H
+#define ASM_X86__APICDEF_H
 
 /*
  * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
@@ -411,4 +411,4 @@
 #else
  #define BAD_APICID 0xFFFFu
 #endif
-#endif
+#endif /* ASM_X86__APICDEF_H */
diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h
index 8411750..72adc3a 100644
--- a/include/asm-x86/arch_hooks.h
+++ b/include/asm-x86/arch_hooks.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_ARCH_HOOKS_H
-#define _ASM_ARCH_HOOKS_H
+#ifndef ASM_X86__ARCH_HOOKS_H
+#define ASM_X86__ARCH_HOOKS_H
 
 #include <linux/interrupt.h>
 
@@ -25,4 +25,4 @@
 extern void time_init_hook(void);
 extern void mca_nmi_hook(void);
 
-#endif
+#endif /* ASM_X86__ARCH_HOOKS_H */
diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h
index 9722032..e1355f4 100644
--- a/include/asm-x86/asm.h
+++ b/include/asm-x86/asm.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_ASM_H
-#define _ASM_X86_ASM_H
+#ifndef ASM_X86__ASM_H
+#define ASM_X86__ASM_H
 
 #ifdef __ASSEMBLY__
 # define __ASM_FORM(x)	x
@@ -20,17 +20,22 @@
 
 #define _ASM_PTR	__ASM_SEL(.long, .quad)
 #define _ASM_ALIGN	__ASM_SEL(.balign 4, .balign 8)
-#define _ASM_MOV_UL	__ASM_SIZE(mov)
 
+#define _ASM_MOV	__ASM_SIZE(mov)
 #define _ASM_INC	__ASM_SIZE(inc)
 #define _ASM_DEC	__ASM_SIZE(dec)
 #define _ASM_ADD	__ASM_SIZE(add)
 #define _ASM_SUB	__ASM_SIZE(sub)
 #define _ASM_XADD	__ASM_SIZE(xadd)
+
 #define _ASM_AX		__ASM_REG(ax)
 #define _ASM_BX		__ASM_REG(bx)
 #define _ASM_CX		__ASM_REG(cx)
 #define _ASM_DX		__ASM_REG(dx)
+#define _ASM_SP		__ASM_REG(sp)
+#define _ASM_BP		__ASM_REG(bp)
+#define _ASM_SI		__ASM_REG(si)
+#define _ASM_DI		__ASM_REG(di)
 
 /* Exception table entry */
 # define _ASM_EXTABLE(from,to) \
@@ -39,4 +44,4 @@
 	_ASM_PTR #from "," #to "\n" \
 	" .previous\n"
 
-#endif /* _ASM_X86_ASM_H */
+#endif /* ASM_X86__ASM_H */
diff --git a/include/asm-x86/atomic_32.h b/include/asm-x86/atomic_32.h
index 21a4825..14d3f0b 100644
--- a/include/asm-x86/atomic_32.h
+++ b/include/asm-x86/atomic_32.h
@@ -1,5 +1,5 @@
-#ifndef __ARCH_I386_ATOMIC__
-#define __ARCH_I386_ATOMIC__
+#ifndef ASM_X86__ATOMIC_32_H
+#define ASM_X86__ATOMIC_32_H
 
 #include <linux/compiler.h>
 #include <asm/processor.h>
@@ -256,4 +256,4 @@
 #define smp_mb__after_atomic_inc()	barrier()
 
 #include <asm-generic/atomic.h>
-#endif
+#endif /* ASM_X86__ATOMIC_32_H */
diff --git a/include/asm-x86/atomic_64.h b/include/asm-x86/atomic_64.h
index 91c7d03..2cb218c 100644
--- a/include/asm-x86/atomic_64.h
+++ b/include/asm-x86/atomic_64.h
@@ -1,5 +1,5 @@
-#ifndef __ARCH_X86_64_ATOMIC__
-#define __ARCH_X86_64_ATOMIC__
+#ifndef ASM_X86__ATOMIC_64_H
+#define ASM_X86__ATOMIC_64_H
 
 #include <asm/alternative.h>
 #include <asm/cmpxchg.h>
@@ -470,4 +470,4 @@
 #define smp_mb__after_atomic_inc()	barrier()
 
 #include <asm-generic/atomic.h>
-#endif
+#endif /* ASM_X86__ATOMIC_64_H */
diff --git a/include/asm-x86/auxvec.h b/include/asm-x86/auxvec.h
index 87f5e6d..12c7cac 100644
--- a/include/asm-x86/auxvec.h
+++ b/include/asm-x86/auxvec.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_AUXVEC_H
-#define _ASM_X86_AUXVEC_H
+#ifndef ASM_X86__AUXVEC_H
+#define ASM_X86__AUXVEC_H
 /*
  * Architecture-neutral AT_ values in 0-17, leave some room
  * for more of them, start the x86-specific ones at 32.
@@ -9,4 +9,4 @@
 #endif
 #define AT_SYSINFO_EHDR		33
 
-#endif
+#endif /* ASM_X86__AUXVEC_H */
diff --git a/include/asm-x86/bios_ebda.h b/include/asm-x86/bios_ebda.h
index 0033e50..ec42ed8 100644
--- a/include/asm-x86/bios_ebda.h
+++ b/include/asm-x86/bios_ebda.h
@@ -1,5 +1,5 @@
-#ifndef _MACH_BIOS_EBDA_H
-#define _MACH_BIOS_EBDA_H
+#ifndef ASM_X86__BIOS_EBDA_H
+#define ASM_X86__BIOS_EBDA_H
 
 #include <asm/io.h>
 
@@ -16,4 +16,4 @@
 
 void reserve_ebda_region(void);
 
-#endif /* _MACH_BIOS_EBDA_H */
+#endif /* ASM_X86__BIOS_EBDA_H */
diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index cfb2b64..61989b9 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_BITOPS_H
-#define _ASM_X86_BITOPS_H
+#ifndef ASM_X86__BITOPS_H
+#define ASM_X86__BITOPS_H
 
 /*
  * Copyright 1992, Linus Torvalds.
@@ -458,4 +458,4 @@
 #include <asm-generic/bitops/minix.h>
 
 #endif /* __KERNEL__ */
-#endif	/* _ASM_X86_BITOPS_H */
+#endif /* ASM_X86__BITOPS_H */
diff --git a/include/asm-x86/boot.h b/include/asm-x86/boot.h
index 2faed7e..825de5d 100644
--- a/include/asm-x86/boot.h
+++ b/include/asm-x86/boot.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_BOOT_H
-#define _ASM_BOOT_H
+#ifndef ASM_X86__BOOT_H
+#define ASM_X86__BOOT_H
 
 /* Don't touch these, unless you really know what you're doing. */
 #define DEF_INITSEG	0x9000
@@ -25,4 +25,4 @@
 #define BOOT_STACK_SIZE	0x1000
 #endif
 
-#endif /* _ASM_BOOT_H */
+#endif /* ASM_X86__BOOT_H */
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index ae22bdf..ccf027e 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_BOOTPARAM_H
-#define _ASM_BOOTPARAM_H
+#ifndef ASM_X86__BOOTPARAM_H
+#define ASM_X86__BOOTPARAM_H
 
 #include <linux/types.h>
 #include <linux/screen_info.h>
@@ -108,4 +108,4 @@
 	__u8  _pad9[276];				/* 0xeec */
 } __attribute__((packed));
 
-#endif /* _ASM_BOOTPARAM_H */
+#endif /* ASM_X86__BOOTPARAM_H */
diff --git a/include/asm-x86/bug.h b/include/asm-x86/bug.h
index b69aa64..91ad43a 100644
--- a/include/asm-x86/bug.h
+++ b/include/asm-x86/bug.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_BUG_H
-#define _ASM_X86_BUG_H
+#ifndef ASM_X86__BUG_H
+#define ASM_X86__BUG_H
 
 #ifdef CONFIG_BUG
 #define HAVE_ARCH_BUG
@@ -36,4 +36,4 @@
 #endif /* !CONFIG_BUG */
 
 #include <asm-generic/bug.h>
-#endif
+#endif /* ASM_X86__BUG_H */
diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h
index 021cbdd..4761c46 100644
--- a/include/asm-x86/bugs.h
+++ b/include/asm-x86/bugs.h
@@ -1,7 +1,7 @@
-#ifndef _ASM_X86_BUGS_H
-#define _ASM_X86_BUGS_H
+#ifndef ASM_X86__BUGS_H
+#define ASM_X86__BUGS_H
 
 extern void check_bugs(void);
 int ppro_with_ram_bug(void);
 
-#endif /* _ASM_X86_BUGS_H */
+#endif /* ASM_X86__BUGS_H */
diff --git a/include/asm-x86/byteorder.h b/include/asm-x86/byteorder.h
index e02ae2d..722f27d 100644
--- a/include/asm-x86/byteorder.h
+++ b/include/asm-x86/byteorder.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_BYTEORDER_H
-#define _ASM_X86_BYTEORDER_H
+#ifndef ASM_X86__BYTEORDER_H
+#define ASM_X86__BYTEORDER_H
 
 #include <asm/types.h>
 #include <linux/compiler.h>
@@ -78,4 +78,4 @@
 
 #include <linux/byteorder/little_endian.h>
 
-#endif /* _ASM_X86_BYTEORDER_H */
+#endif /* ASM_X86__BYTEORDER_H */
diff --git a/include/asm-x86/cache.h b/include/asm-x86/cache.h
index 1e0bac8..ea3f1cc 100644
--- a/include/asm-x86/cache.h
+++ b/include/asm-x86/cache.h
@@ -1,5 +1,5 @@
-#ifndef _ARCH_X86_CACHE_H
-#define _ARCH_X86_CACHE_H
+#ifndef ASM_X86__CACHE_H
+#define ASM_X86__CACHE_H
 
 /* L1 cache line size */
 #define L1_CACHE_SHIFT	(CONFIG_X86_L1_CACHE_SHIFT)
@@ -17,4 +17,4 @@
 #endif
 #endif
 
-#endif
+#endif /* ASM_X86__CACHE_H */
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index f4c0ab5..59859cb 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_CACHEFLUSH_H
-#define _ASM_X86_CACHEFLUSH_H
+#ifndef ASM_X86__CACHEFLUSH_H
+#define ASM_X86__CACHEFLUSH_H
 
 /* Keep includes the same across arches.  */
 #include <linux/mm.h>
@@ -112,4 +112,4 @@
 }
 #endif
 
-#endif
+#endif /* ASM_X86__CACHEFLUSH_H */
diff --git a/include/asm-x86/calgary.h b/include/asm-x86/calgary.h
index 67f6040..933fd27 100644
--- a/include/asm-x86/calgary.h
+++ b/include/asm-x86/calgary.h
@@ -21,8 +21,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#ifndef _ASM_X86_64_CALGARY_H
-#define _ASM_X86_64_CALGARY_H
+#ifndef ASM_X86__CALGARY_H
+#define ASM_X86__CALGARY_H
 
 #include <linux/spinlock.h>
 #include <linux/device.h>
@@ -69,4 +69,4 @@
 static inline void detect_calgary(void) { return; }
 #endif
 
-#endif /* _ASM_X86_64_CALGARY_H */
+#endif /* ASM_X86__CALGARY_H */
diff --git a/include/asm-x86/checksum_32.h b/include/asm-x86/checksum_32.h
index 52bbb0d..d041e8c 100644
--- a/include/asm-x86/checksum_32.h
+++ b/include/asm-x86/checksum_32.h
@@ -1,5 +1,5 @@
-#ifndef _I386_CHECKSUM_H
-#define _I386_CHECKSUM_H
+#ifndef ASM_X86__CHECKSUM_32_H
+#define ASM_X86__CHECKSUM_32_H
 
 #include <linux/in6.h>
 
@@ -186,4 +186,4 @@
 	return (__force __wsum)-1; /* invalid checksum */
 }
 
-#endif
+#endif /* ASM_X86__CHECKSUM_32_H */
diff --git a/include/asm-x86/checksum_64.h b/include/asm-x86/checksum_64.h
index 8bd861c..110f403 100644
--- a/include/asm-x86/checksum_64.h
+++ b/include/asm-x86/checksum_64.h
@@ -1,5 +1,5 @@
-#ifndef _X86_64_CHECKSUM_H
-#define _X86_64_CHECKSUM_H
+#ifndef ASM_X86__CHECKSUM_64_H
+#define ASM_X86__CHECKSUM_64_H
 
 /*
  * Checksums for x86-64
@@ -188,4 +188,4 @@
 	return a;
 }
 
-#endif
+#endif /* ASM_X86__CHECKSUM_64_H */
diff --git a/include/asm-x86/cmpxchg_32.h b/include/asm-x86/cmpxchg_32.h
index bf5a69d..0622e45 100644
--- a/include/asm-x86/cmpxchg_32.h
+++ b/include/asm-x86/cmpxchg_32.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_CMPXCHG_H
-#define __ASM_CMPXCHG_H
+#ifndef ASM_X86__CMPXCHG_32_H
+#define ASM_X86__CMPXCHG_32_H
 
 #include <linux/bitops.h> /* for LOCK_PREFIX */
 
@@ -341,4 +341,4 @@
 
 #endif
 
-#endif
+#endif /* ASM_X86__CMPXCHG_32_H */
diff --git a/include/asm-x86/cmpxchg_64.h b/include/asm-x86/cmpxchg_64.h
index 17463cc..63c1a5e 100644
--- a/include/asm-x86/cmpxchg_64.h
+++ b/include/asm-x86/cmpxchg_64.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_CMPXCHG_H
-#define __ASM_CMPXCHG_H
+#ifndef ASM_X86__CMPXCHG_64_H
+#define ASM_X86__CMPXCHG_64_H
 
 #include <asm/alternative.h> /* Provides LOCK_PREFIX */
 
@@ -182,4 +182,4 @@
 	cmpxchg_local((ptr), (o), (n));					\
 })
 
-#endif
+#endif /* ASM_X86__CMPXCHG_64_H */
diff --git a/include/asm-x86/compat.h b/include/asm-x86/compat.h
index 1793ac3..6732b15 100644
--- a/include/asm-x86/compat.h
+++ b/include/asm-x86/compat.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_COMPAT_H
-#define _ASM_X86_64_COMPAT_H
+#ifndef ASM_X86__COMPAT_H
+#define ASM_X86__COMPAT_H
 
 /*
  * Architecture specific compatibility types
@@ -215,4 +215,4 @@
 	return current_thread_info()->status & TS_COMPAT;
 }
 
-#endif /* _ASM_X86_64_COMPAT_H */
+#endif /* ASM_X86__COMPAT_H */
diff --git a/include/asm-x86/cpu.h b/include/asm-x86/cpu.h
index 73f2ea8..83a1150 100644
--- a/include/asm-x86/cpu.h
+++ b/include/asm-x86/cpu.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_I386_CPU_H_
-#define _ASM_I386_CPU_H_
+#ifndef ASM_X86__CPU_H
+#define ASM_X86__CPU_H
 
 #include <linux/device.h>
 #include <linux/cpu.h>
@@ -17,4 +17,4 @@
 #endif
 
 DECLARE_PER_CPU(int, cpu_state);
-#endif /* _ASM_I386_CPU_H_ */
+#endif /* ASM_X86__CPU_H */
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index cfcfb0a..250fa0c 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -1,8 +1,8 @@
 /*
  * Defines x86 CPU feature bits
  */
-#ifndef _ASM_X86_CPUFEATURE_H
-#define _ASM_X86_CPUFEATURE_H
+#ifndef ASM_X86__CPUFEATURE_H
+#define ASM_X86__CPUFEATURE_H
 
 #include <asm/required-features.h>
 
@@ -224,4 +224,4 @@
 
 #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
 
-#endif /* _ASM_X86_CPUFEATURE_H */
+#endif /* ASM_X86__CPUFEATURE_H */
diff --git a/include/asm-x86/current.h b/include/asm-x86/current.h
index 7515c19..a863ead 100644
--- a/include/asm-x86/current.h
+++ b/include/asm-x86/current.h
@@ -1,5 +1,5 @@
-#ifndef _X86_CURRENT_H
-#define _X86_CURRENT_H
+#ifndef ASM_X86__CURRENT_H
+#define ASM_X86__CURRENT_H
 
 #ifdef CONFIG_X86_32
 #include <linux/compiler.h>
@@ -36,4 +36,4 @@
 
 #define current get_current()
 
-#endif /* X86_CURRENT_H */
+#endif /* ASM_X86__CURRENT_H */
diff --git a/include/asm-x86/debugreg.h b/include/asm-x86/debugreg.h
index c6344d5..ecb6907 100644
--- a/include/asm-x86/debugreg.h
+++ b/include/asm-x86/debugreg.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DEBUGREG_H
-#define _ASM_X86_DEBUGREG_H
+#ifndef ASM_X86__DEBUGREG_H
+#define ASM_X86__DEBUGREG_H
 
 
 /* Indicate the register numbers for a number of the specific
@@ -67,4 +67,4 @@
 #define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
 #define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
 
-#endif
+#endif /* ASM_X86__DEBUGREG_H */
diff --git a/include/asm-x86/delay.h b/include/asm-x86/delay.h
index 409a649..8a0da95 100644
--- a/include/asm-x86/delay.h
+++ b/include/asm-x86/delay.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DELAY_H
-#define _ASM_X86_DELAY_H
+#ifndef ASM_X86__DELAY_H
+#define ASM_X86__DELAY_H
 
 /*
  * Copyright (C) 1993 Linus Torvalds
@@ -28,4 +28,4 @@
 
 void use_tsc_delay(void);
 
-#endif /* _ASM_X86_DELAY_H */
+#endif /* ASM_X86__DELAY_H */
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index a44c4dc..b73fea5 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_DESC_H_
-#define _ASM_DESC_H_
+#ifndef ASM_X86__DESC_H
+#define ASM_X86__DESC_H
 
 #ifndef __ASSEMBLY__
 #include <asm/desc_defs.h>
@@ -397,4 +397,4 @@
 
 #endif /* __ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__DESC_H */
diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h
index f7bacf3..b881db6 100644
--- a/include/asm-x86/desc_defs.h
+++ b/include/asm-x86/desc_defs.h
@@ -1,6 +1,6 @@
 /* Written 2000 by Andi Kleen */
-#ifndef __ARCH_DESC_DEFS_H
-#define __ARCH_DESC_DEFS_H
+#ifndef ASM_X86__DESC_DEFS_H
+#define ASM_X86__DESC_DEFS_H
 
 /*
  * Segment descriptor structure definitions, usable from both x86_64 and i386
@@ -92,4 +92,4 @@
 
 #endif /* !__ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__DESC_DEFS_H */
diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h
index 3c034f4..1bece04 100644
--- a/include/asm-x86/device.h
+++ b/include/asm-x86/device.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DEVICE_H
-#define _ASM_X86_DEVICE_H
+#ifndef ASM_X86__DEVICE_H
+#define ASM_X86__DEVICE_H
 
 struct dev_archdata {
 #ifdef CONFIG_ACPI
@@ -13,4 +13,4 @@
 #endif
 };
 
-#endif /* _ASM_X86_DEVICE_H */
+#endif /* ASM_X86__DEVICE_H */
diff --git a/include/asm-x86/div64.h b/include/asm-x86/div64.h
index 9a2d644..f9530f2 100644
--- a/include/asm-x86/div64.h
+++ b/include/asm-x86/div64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DIV64_H
-#define _ASM_X86_DIV64_H
+#ifndef ASM_X86__DIV64_H
+#define ASM_X86__DIV64_H
 
 #ifdef CONFIG_X86_32
 
@@ -57,4 +57,4 @@
 # include <asm-generic/div64.h>
 #endif /* CONFIG_X86_32 */
 
-#endif /* _ASM_X86_DIV64_H */
+#endif /* ASM_X86__DIV64_H */
diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h
index ad9cd6d..5d200e7 100644
--- a/include/asm-x86/dma-mapping.h
+++ b/include/asm-x86/dma-mapping.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_DMA_MAPPING_H_
-#define _ASM_DMA_MAPPING_H_
+#ifndef ASM_X86__DMA_MAPPING_H
+#define ASM_X86__DMA_MAPPING_H
 
 /*
  * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
@@ -250,4 +250,4 @@
 #define dma_is_consistent(d, h)	(1)
 
 #include <asm-generic/dma-coherent.h>
-#endif
+#endif /* ASM_X86__DMA_MAPPING_H */
diff --git a/include/asm-x86/dma.h b/include/asm-x86/dma.h
index ca1098a..c9f7a4e 100644
--- a/include/asm-x86/dma.h
+++ b/include/asm-x86/dma.h
@@ -5,8 +5,8 @@
  * and John Boyd, Nov. 1992.
  */
 
-#ifndef _ASM_X86_DMA_H
-#define _ASM_X86_DMA_H
+#ifndef ASM_X86__DMA_H
+#define ASM_X86__DMA_H
 
 #include <linux/spinlock.h>	/* And spinlocks */
 #include <asm/io.h>		/* need byte IO */
@@ -315,4 +315,4 @@
 #define isa_dma_bridge_buggy	(0)
 #endif
 
-#endif /* _ASM_X86_DMA_H */
+#endif /* ASM_X86__DMA_H */
diff --git a/include/asm-x86/dmi.h b/include/asm-x86/dmi.h
index 58a8657..1cff6fe 100644
--- a/include/asm-x86/dmi.h
+++ b/include/asm-x86/dmi.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DMI_H
-#define _ASM_X86_DMI_H
+#ifndef ASM_X86__DMI_H
+#define ASM_X86__DMI_H
 
 #include <asm/io.h>
 
@@ -23,4 +23,4 @@
 #define dmi_ioremap early_ioremap
 #define dmi_iounmap early_iounmap
 
-#endif
+#endif /* ASM_X86__DMI_H */
diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h
index 7881368..c3c953a 100644
--- a/include/asm-x86/ds.h
+++ b/include/asm-x86/ds.h
@@ -2,71 +2,237 @@
  * Debug Store (DS) support
  *
  * This provides a low-level interface to the hardware's Debug Store
- * feature that is used for last branch recording (LBR) and
+ * feature that is used for branch trace store (BTS) and
  * precise-event based sampling (PEBS).
  *
- * Different architectures use a different DS layout/pointer size.
- * The below functions therefore work on a void*.
+ * It manages:
+ * - per-thread and per-cpu allocation of BTS and PEBS
+ * - buffer memory allocation (optional)
+ * - buffer overflow handling
+ * - buffer access
+ *
+ * It assumes:
+ * - get_task_struct on all parameter tasks
+ * - current is allowed to trace parameter tasks
  *
  *
- * Since there is no user for PEBS, yet, only LBR (or branch
- * trace store, BTS) is supported.
- *
- *
- * Copyright (C) 2007 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ * Copyright (C) 2007-2008 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
  */
 
-#ifndef _ASM_X86_DS_H
-#define _ASM_X86_DS_H
+#ifndef ASM_X86__DS_H
+#define ASM_X86__DS_H
+
+#ifdef CONFIG_X86_DS
 
 #include <linux/types.h>
 #include <linux/init.h>
 
-struct cpuinfo_x86;
 
+struct task_struct;
 
-/* a branch trace record entry
+/*
+ * Request BTS or PEBS
  *
- * In order to unify the interface between various processor versions,
- * we use the below data structure for all processors.
+ * Due to alignement constraints, the actual buffer may be slightly
+ * smaller than the requested or provided buffer.
+ *
+ * Returns 0 on success; -Eerrno otherwise
+ *
+ * task: the task to request recording for;
+ *       NULL for per-cpu recording on the current cpu
+ * base: the base pointer for the (non-pageable) buffer;
+ *       NULL if buffer allocation requested
+ * size: the size of the requested or provided buffer
+ * ovfl: pointer to a function to be called on buffer overflow;
+ *       NULL if cyclic buffer requested
  */
-enum bts_qualifier {
-	BTS_INVALID = 0,
-	BTS_BRANCH,
-	BTS_TASK_ARRIVES,
-	BTS_TASK_DEPARTS
+typedef void (*ds_ovfl_callback_t)(struct task_struct *);
+extern int ds_request_bts(struct task_struct *task, void *base, size_t size,
+			  ds_ovfl_callback_t ovfl);
+extern int ds_request_pebs(struct task_struct *task, void *base, size_t size,
+			   ds_ovfl_callback_t ovfl);
+
+/*
+ * Release BTS or PEBS resources
+ *
+ * Frees buffers allocated on ds_request.
+ *
+ * Returns 0 on success; -Eerrno otherwise
+ *
+ * task: the task to release resources for;
+ *       NULL to release resources for the current cpu
+ */
+extern int ds_release_bts(struct task_struct *task);
+extern int ds_release_pebs(struct task_struct *task);
+
+/*
+ * Return the (array) index of the write pointer.
+ * (assuming an array of BTS/PEBS records)
+ *
+ * Returns -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ * pos (out): if not NULL, will hold the result
+ */
+extern int ds_get_bts_index(struct task_struct *task, size_t *pos);
+extern int ds_get_pebs_index(struct task_struct *task, size_t *pos);
+
+/*
+ * Return the (array) index one record beyond the end of the array.
+ * (assuming an array of BTS/PEBS records)
+ *
+ * Returns -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ * pos (out): if not NULL, will hold the result
+ */
+extern int ds_get_bts_end(struct task_struct *task, size_t *pos);
+extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
+
+/*
+ * Provide a pointer to the BTS/PEBS record at parameter index.
+ * (assuming an array of BTS/PEBS records)
+ *
+ * The pointer points directly into the buffer. The user is
+ * responsible for copying the record.
+ *
+ * Returns the size of a single record on success; -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ * index: the index of the requested record
+ * record (out): pointer to the requested record
+ */
+extern int ds_access_bts(struct task_struct *task,
+			 size_t index, const void **record);
+extern int ds_access_pebs(struct task_struct *task,
+			  size_t index, const void **record);
+
+/*
+ * Write one or more BTS/PEBS records at the write pointer index and
+ * advance the write pointer.
+ *
+ * If size is not a multiple of the record size, trailing bytes are
+ * zeroed out.
+ *
+ * May result in one or more overflow notifications.
+ *
+ * If called during overflow handling, that is, with index >=
+ * interrupt threshold, the write will wrap around.
+ *
+ * An overflow notification is given if and when the interrupt
+ * threshold is reached during or after the write.
+ *
+ * Returns the number of bytes written or -Eerrno.
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ * buffer: the buffer to write
+ * size: the size of the buffer
+ */
+extern int ds_write_bts(struct task_struct *task,
+			const void *buffer, size_t size);
+extern int ds_write_pebs(struct task_struct *task,
+			 const void *buffer, size_t size);
+
+/*
+ * Same as ds_write_bts/pebs, but omit ownership checks.
+ *
+ * This is needed to have some other task than the owner of the
+ * BTS/PEBS buffer or the parameter task itself write into the
+ * respective buffer.
+ */
+extern int ds_unchecked_write_bts(struct task_struct *task,
+				  const void *buffer, size_t size);
+extern int ds_unchecked_write_pebs(struct task_struct *task,
+				   const void *buffer, size_t size);
+
+/*
+ * Reset the write pointer of the BTS/PEBS buffer.
+ *
+ * Returns 0 on success; -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ */
+extern int ds_reset_bts(struct task_struct *task);
+extern int ds_reset_pebs(struct task_struct *task);
+
+/*
+ * Clear the BTS/PEBS buffer and reset the write pointer.
+ * The entire buffer will be zeroed out.
+ *
+ * Returns 0 on success; -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ */
+extern int ds_clear_bts(struct task_struct *task);
+extern int ds_clear_pebs(struct task_struct *task);
+
+/*
+ * Provide the PEBS counter reset value.
+ *
+ * Returns 0 on success; -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ * value (out): the counter reset value
+ */
+extern int ds_get_pebs_reset(struct task_struct *task, u64 *value);
+
+/*
+ * Set the PEBS counter reset value.
+ *
+ * Returns 0 on success; -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ * value: the new counter reset value
+ */
+extern int ds_set_pebs_reset(struct task_struct *task, u64 value);
+
+/*
+ * Initialization
+ */
+struct cpuinfo_x86;
+extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
+
+
+
+/*
+ * The DS context - part of struct thread_struct.
+ */
+struct ds_context {
+	/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
+	unsigned char *ds;
+	/* the owner of the BTS and PEBS configuration, respectively */
+	struct task_struct *owner[2];
+	/* buffer overflow notification function for BTS and PEBS */
+	ds_ovfl_callback_t callback[2];
+	/* the original buffer address */
+	void *buffer[2];
+	/* the number of allocated pages for on-request allocated buffers */
+	unsigned int pages[2];
+	/* use count */
+	unsigned long count;
+	/* a pointer to the context location inside the thread_struct
+	 * or the per_cpu context array */
+	struct ds_context **this;
+	/* a pointer to the task owning this context, or NULL, if the
+	 * context is owned by a cpu */
+	struct task_struct *task;
 };
 
-struct bts_struct {
-	u64 qualifier;
-	union {
-		/* BTS_BRANCH */
-		struct {
-			u64 from_ip;
-			u64 to_ip;
-		} lbr;
-		/* BTS_TASK_ARRIVES or
-		   BTS_TASK_DEPARTS */
-		u64 jiffies;
-	} variant;
-};
+/* called by exit_thread() to free leftover contexts */
+extern void ds_free(struct ds_context *context);
 
-/* Overflow handling mechanisms */
-#define DS_O_SIGNAL	1 /* send overflow signal */
-#define DS_O_WRAP	2 /* wrap around */
+#else /* CONFIG_X86_DS */
 
-extern int ds_allocate(void **, size_t);
-extern int ds_free(void **);
-extern int ds_get_bts_size(void *);
-extern int ds_get_bts_end(void *);
-extern int ds_get_bts_index(void *);
-extern int ds_set_overflow(void *, int);
-extern int ds_get_overflow(void *);
-extern int ds_clear(void *);
-extern int ds_read_bts(void *, int, struct bts_struct *);
-extern int ds_write_bts(void *, const struct bts_struct *);
-extern unsigned long ds_debugctl_mask(void);
-extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c);
+#define ds_init_intel(config) do {} while (0)
 
-#endif /* _ASM_X86_DS_H */
+#endif /* CONFIG_X86_DS */
+#endif /* ASM_X86__DS_H */
diff --git a/include/asm-x86/dwarf2.h b/include/asm-x86/dwarf2.h
index 738bb9f..21d1bc3 100644
--- a/include/asm-x86/dwarf2.h
+++ b/include/asm-x86/dwarf2.h
@@ -1,5 +1,5 @@
-#ifndef _DWARF2_H
-#define _DWARF2_H
+#ifndef ASM_X86__DWARF2_H
+#define ASM_X86__DWARF2_H
 
 #ifndef __ASSEMBLY__
 #warning "asm/dwarf2.h should be only included in pure assembly files"
@@ -58,4 +58,4 @@
 
 #endif
 
-#endif
+#endif /* ASM_X86__DWARF2_H */
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 16a31e2..f52daf1 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_E820_H
-#define __ASM_E820_H
+#ifndef ASM_X86__E820_H
+#define ASM_X86__E820_H
 #define E820MAP	0x2d0		/* our map */
 #define E820MAX	128		/* number of entries in E820MAP */
 
@@ -64,6 +64,7 @@
 extern struct e820map e820;
 extern struct e820map e820_saved;
 
+extern unsigned long pci_mem_start;
 extern int e820_any_mapped(u64 start, u64 end, unsigned type);
 extern int e820_all_mapped(u64 start, u64 end, unsigned type);
 extern void e820_add_region(u64 start, u64 size, int type);
@@ -140,4 +141,4 @@
 #define HIGH_MEMORY	(1024*1024)
 #endif /* __KERNEL__ */
 
-#endif  /* __ASM_E820_H */
+#endif /* ASM_X86__E820_H */
diff --git a/include/asm-x86/edac.h b/include/asm-x86/edac.h
index a8088f6..9493c5b 100644
--- a/include/asm-x86/edac.h
+++ b/include/asm-x86/edac.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_EDAC_H
-#define _ASM_X86_EDAC_H
+#ifndef ASM_X86__EDAC_H
+#define ASM_X86__EDAC_H
 
 /* ECC atomic, DMA, SMP and interrupt safe scrub function */
 
@@ -15,4 +15,4 @@
 		asm volatile("lock; addl $0, %0"::"m" (*virt_addr));
 }
 
-#endif
+#endif /* ASM_X86__EDAC_H */
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index d4f2b0a..ed2de22 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_EFI_H
-#define _ASM_X86_EFI_H
+#ifndef ASM_X86__EFI_H
+#define ASM_X86__EFI_H
 
 #ifdef CONFIG_X86_32
 
@@ -94,4 +94,4 @@
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
 
-#endif
+#endif /* ASM_X86__EFI_H */
diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index 7be4733..5c4745b 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_ELF_H
-#define _ASM_X86_ELF_H
+#ifndef ASM_X86__ELF_H
+#define ASM_X86__ELF_H
 
 /*
  * ELF register definitions..
@@ -148,8 +148,9 @@
 
 static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp)
 {
-	asm volatile("movl %0,%%fs" :: "r" (0));
-	asm volatile("movl %0,%%es; movl %0,%%ds" : : "r" (__USER32_DS));
+	loadsegment(fs, 0);
+	loadsegment(ds, __USER32_DS);
+	loadsegment(es, __USER32_DS);
 	load_gs_index(0);
 	regs->ip = ip;
 	regs->sp = sp;
@@ -332,4 +333,4 @@
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
-#endif
+#endif /* ASM_X86__ELF_H */
diff --git a/include/asm-x86/emergency-restart.h b/include/asm-x86/emergency-restart.h
index 8e6aef1..190d0d8 100644
--- a/include/asm-x86/emergency-restart.h
+++ b/include/asm-x86/emergency-restart.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
+#ifndef ASM_X86__EMERGENCY_RESTART_H
+#define ASM_X86__EMERGENCY_RESTART_H
 
 enum reboot_type {
 	BOOT_TRIPLE = 't',
@@ -15,4 +15,4 @@
 
 extern void machine_emergency_restart(void);
 
-#endif /* _ASM_EMERGENCY_RESTART_H */
+#endif /* ASM_X86__EMERGENCY_RESTART_H */
diff --git a/include/asm-x86/fb.h b/include/asm-x86/fb.h
index 5301846..aca38dbd 100644
--- a/include/asm-x86/fb.h
+++ b/include/asm-x86/fb.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_FB_H
-#define _ASM_X86_FB_H
+#ifndef ASM_X86__FB_H
+#define ASM_X86__FB_H
 
 #include <linux/fb.h>
 #include <linux/fs.h>
@@ -18,4 +18,4 @@
 static inline int fb_is_primary_device(struct fb_info *info) { return 0; }
 #endif
 
-#endif /* _ASM_X86_FB_H */
+#endif /* ASM_X86__FB_H */
diff --git a/include/asm-x86/fixmap.h b/include/asm-x86/fixmap.h
index 44d4f82..78e33a1 100644
--- a/include/asm-x86/fixmap.h
+++ b/include/asm-x86/fixmap.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_FIXMAP_H
-#define _ASM_FIXMAP_H
+#ifndef ASM_X86__FIXMAP_H
+#define ASM_X86__FIXMAP_H
 
 #ifdef CONFIG_X86_32
 # include "fixmap_32.h"
@@ -65,4 +65,4 @@
 	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
 	return __virt_to_fix(vaddr);
 }
-#endif
+#endif /* ASM_X86__FIXMAP_H */
diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h
index f1ac2b2..784e3e7 100644
--- a/include/asm-x86/fixmap_32.h
+++ b/include/asm-x86/fixmap_32.h
@@ -10,8 +10,8 @@
  * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
-#ifndef _ASM_FIXMAP_32_H
-#define _ASM_FIXMAP_32_H
+#ifndef ASM_X86__FIXMAP_32_H
+#define ASM_X86__FIXMAP_32_H
 
 
 /* used by vmalloc.c, vsyscall.lds.S.
@@ -120,4 +120,4 @@
 #define FIXADDR_BOOT_START	(FIXADDR_TOP - __FIXADDR_BOOT_SIZE)
 
 #endif /* !__ASSEMBLY__ */
-#endif
+#endif /* ASM_X86__FIXMAP_32_H */
diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h
index 00f3d74..dafb24b 100644
--- a/include/asm-x86/fixmap_64.h
+++ b/include/asm-x86/fixmap_64.h
@@ -8,8 +8,8 @@
  * Copyright (C) 1998 Ingo Molnar
  */
 
-#ifndef _ASM_FIXMAP_64_H
-#define _ASM_FIXMAP_64_H
+#ifndef ASM_X86__FIXMAP_64_H
+#define ASM_X86__FIXMAP_64_H
 
 #include <linux/kernel.h>
 #include <asm/acpi.h>
@@ -80,4 +80,4 @@
 #define FIXADDR_USER_START	((unsigned long)VSYSCALL32_VSYSCALL)
 #define FIXADDR_USER_END	(FIXADDR_USER_START + PAGE_SIZE)
 
-#endif
+#endif /* ASM_X86__FIXMAP_64_H */
diff --git a/include/asm-x86/floppy.h b/include/asm-x86/floppy.h
index dbe82a5..7d83a3a 100644
--- a/include/asm-x86/floppy.h
+++ b/include/asm-x86/floppy.h
@@ -7,8 +7,8 @@
  *
  * Copyright (C) 1995
  */
-#ifndef _ASM_X86_FLOPPY_H
-#define _ASM_X86_FLOPPY_H
+#ifndef ASM_X86__FLOPPY_H
+#define ASM_X86__FLOPPY_H
 
 #include <linux/vmalloc.h>
 
@@ -278,4 +278,4 @@
 
 #define EXTRA_FLOPPY_PARAMS
 
-#endif /* _ASM_X86_FLOPPY_H */
+#endif /* ASM_X86__FLOPPY_H */
diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h
index 5c68b32..be0e004 100644
--- a/include/asm-x86/ftrace.h
+++ b/include/asm-x86/ftrace.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_FTRACE
-#define _ASM_X86_FTRACE
+#ifndef ASM_X86__FTRACE_H
+#define ASM_X86__FTRACE_H
 
 #ifdef CONFIG_FTRACE
 #define MCOUNT_ADDR		((long)(mcount))
@@ -11,4 +11,4 @@
 
 #endif /* CONFIG_FTRACE */
 
-#endif /* _ASM_X86_FTRACE */
+#endif /* ASM_X86__FTRACE_H */
diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h
index e7a76b3..06b924e 100644
--- a/include/asm-x86/futex.h
+++ b/include/asm-x86/futex.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_FUTEX_H
-#define _ASM_X86_FUTEX_H
+#ifndef ASM_X86__FUTEX_H
+#define ASM_X86__FUTEX_H
 
 #ifdef __KERNEL__
 
@@ -25,7 +25,7 @@
 	asm volatile("1:\tmovl	%2, %0\n"			\
 		     "\tmovl\t%0, %3\n"				\
 		     "\t" insn "\n"				\
-		     "2:\tlock; cmpxchgl %3, %2\n"		\
+		     "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"	\
 		     "\tjnz\t1b\n"				\
 		     "3:\t.section .fixup,\"ax\"\n"		\
 		     "4:\tmov\t%5, %1\n"			\
@@ -64,7 +64,7 @@
 		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
 		break;
 	case FUTEX_OP_ADD:
-		__futex_atomic_op1("lock; xaddl %0, %2", ret, oldval,
+		__futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
 				   uaddr, oparg);
 		break;
 	case FUTEX_OP_OR:
@@ -122,7 +122,7 @@
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	asm volatile("1:\tlock; cmpxchgl %3, %1\n"
+	asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n"
 		     "2:\t.section .fixup, \"ax\"\n"
 		     "3:\tmov     %2, %0\n"
 		     "\tjmp     2b\n"
@@ -137,4 +137,4 @@
 }
 
 #endif
-#endif
+#endif /* ASM_X86__FUTEX_H */
diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h
index 3f62a83..baa54fa 100644
--- a/include/asm-x86/gart.h
+++ b/include/asm-x86/gart.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X8664_GART_H
-#define _ASM_X8664_GART_H 1
+#ifndef ASM_X86__GART_H
+#define ASM_X86__GART_H
 
 #include <asm/e820.h>
 
@@ -52,15 +52,15 @@
 		return 0;
 
 	if (aper_base + aper_size > 0x100000000ULL) {
-		printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n");
+		printk(KERN_INFO "Aperture beyond 4GB. Ignoring.\n");
 		return 0;
 	}
 	if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
-		printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n");
+		printk(KERN_INFO "Aperture pointing to e820 RAM. Ignoring.\n");
 		return 0;
 	}
 	if (aper_size < min_size) {
-		printk(KERN_ERR "Aperture too small (%d MB) than (%d MB)\n",
+		printk(KERN_INFO "Aperture too small (%d MB) than (%d MB)\n",
 				 aper_size>>20, min_size>>20);
 		return 0;
 	}
@@ -68,4 +68,4 @@
 	return 1;
 }
 
-#endif
+#endif /* ASM_X86__GART_H */
diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h
index 754d635..34280f0 100644
--- a/include/asm-x86/genapic_32.h
+++ b/include/asm-x86/genapic_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_GENAPIC_H
-#define _ASM_GENAPIC_H 1
+#ifndef ASM_X86__GENAPIC_32_H
+#define ASM_X86__GENAPIC_32_H
 
 #include <asm/mpspec.h>
 
@@ -121,4 +121,4 @@
 #define uv_system_init()		do {} while (0)
 
 
-#endif
+#endif /* ASM_X86__GENAPIC_32_H */
diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h
index a47d631..25097a8 100644
--- a/include/asm-x86/genapic_64.h
+++ b/include/asm-x86/genapic_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_GENAPIC_H
-#define _ASM_GENAPIC_H 1
+#ifndef ASM_X86__GENAPIC_64_H
+#define ASM_X86__GENAPIC_64_H
 
 /*
  * Copyright 2004 James Cleverdon, IBM.
@@ -47,4 +47,4 @@
 
 extern void setup_apic_routing(void);
 
-#endif
+#endif /* ASM_X86__GENAPIC_64_H */
diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h
index 2c1cda0..3f3444b 100644
--- a/include/asm-x86/geode.h
+++ b/include/asm-x86/geode.h
@@ -7,8 +7,8 @@
  * as published by the Free Software Foundation.
  */
 
-#ifndef _ASM_GEODE_H_
-#define _ASM_GEODE_H_
+#ifndef ASM_X86__GEODE_H
+#define ASM_X86__GEODE_H
 
 #include <asm/processor.h>
 #include <linux/io.h>
@@ -250,4 +250,4 @@
 static inline int mfgpt_timer_setup(void) { return 0; }
 #endif
 
-#endif
+#endif /* ASM_X86__GEODE_H */
diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h
index c4c91b3..497fb98 100644
--- a/include/asm-x86/gpio.h
+++ b/include/asm-x86/gpio.h
@@ -53,4 +53,4 @@
 
 #endif /* CONFIG_GPIOLIB */
 
-#endif /* _ASM_I386_GPIO_H */
+#endif /* ASM_X86__GPIO_H */
diff --git a/include/asm-x86/hardirq_32.h b/include/asm-x86/hardirq_32.h
index 4f85f0f..700fe23 100644
--- a/include/asm-x86/hardirq_32.h
+++ b/include/asm-x86/hardirq_32.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_HARDIRQ_H
-#define __ASM_HARDIRQ_H
+#ifndef ASM_X86__HARDIRQ_32_H
+#define ASM_X86__HARDIRQ_32_H
 
 #include <linux/threads.h>
 #include <linux/irq.h>
@@ -25,4 +25,4 @@
 void ack_bad_irq(unsigned int irq);
 #include <linux/irq_cpustat.h>
 
-#endif /* __ASM_HARDIRQ_H */
+#endif /* ASM_X86__HARDIRQ_32_H */
diff --git a/include/asm-x86/hardirq_64.h b/include/asm-x86/hardirq_64.h
index 95d5e09..f8bd291 100644
--- a/include/asm-x86/hardirq_64.h
+++ b/include/asm-x86/hardirq_64.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_HARDIRQ_H
-#define __ASM_HARDIRQ_H
+#ifndef ASM_X86__HARDIRQ_64_H
+#define ASM_X86__HARDIRQ_64_H
 
 #include <linux/threads.h>
 #include <linux/irq.h>
@@ -20,4 +20,4 @@
 
 extern void ack_bad_irq(unsigned int irq);
 
-#endif /* __ASM_HARDIRQ_H */
+#endif /* ASM_X86__HARDIRQ_64_H */
diff --git a/include/asm-x86/highmem.h b/include/asm-x86/highmem.h
index 4514b16..bc3f6a2 100644
--- a/include/asm-x86/highmem.h
+++ b/include/asm-x86/highmem.h
@@ -15,8 +15,8 @@
  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
  */
 
-#ifndef _ASM_HIGHMEM_H
-#define _ASM_HIGHMEM_H
+#ifndef ASM_X86__HIGHMEM_H
+#define ASM_X86__HIGHMEM_H
 
 #ifdef __KERNEL__
 
@@ -79,4 +79,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif /* _ASM_HIGHMEM_H */
+#endif /* ASM_X86__HIGHMEM_H */
diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index 82f1ac6..cbbbb6d 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h
@@ -1,5 +1,5 @@
-#ifndef ASM_X86_HPET_H
-#define ASM_X86_HPET_H
+#ifndef ASM_X86__HPET_H
+#define ASM_X86__HPET_H
 
 #ifdef CONFIG_HPET_TIMER
 
@@ -90,4 +90,4 @@
 #define hpet_readl(a) 0
 
 #endif
-#endif /* ASM_X86_HPET_H */
+#endif /* ASM_X86__HPET_H */
diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h
index 439a9ac..0b7ec5d 100644
--- a/include/asm-x86/hugetlb.h
+++ b/include/asm-x86/hugetlb.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_HUGETLB_H
-#define _ASM_X86_HUGETLB_H
+#ifndef ASM_X86__HUGETLB_H
+#define ASM_X86__HUGETLB_H
 
 #include <asm/page.h>
 
@@ -90,4 +90,4 @@
 {
 }
 
-#endif /* _ASM_X86_HUGETLB_H */
+#endif /* ASM_X86__HUGETLB_H */
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index edd0b95..65997b1 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_HW_IRQ_H
-#define _ASM_HW_IRQ_H
+#ifndef ASM_X86__HW_IRQ_H
+#define ASM_X86__HW_IRQ_H
 
 /*
  * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
@@ -93,6 +93,26 @@
 extern asmlinkage void qic_enable_irq_interrupt(void);
 extern asmlinkage void qic_call_function_interrupt(void);
 
+/* SMP */
+extern void smp_apic_timer_interrupt(struct pt_regs *);
+#ifdef CONFIG_X86_32
+extern void smp_spurious_interrupt(struct pt_regs *);
+extern void smp_error_interrupt(struct pt_regs *);
+#else
+extern asmlinkage void smp_spurious_interrupt(void);
+extern asmlinkage void smp_error_interrupt(void);
+#endif
+#ifdef CONFIG_X86_SMP
+extern void smp_reschedule_interrupt(struct pt_regs *);
+extern void smp_call_function_interrupt(struct pt_regs *);
+extern void smp_call_function_single_interrupt(struct pt_regs *);
+#ifdef CONFIG_X86_32
+extern void smp_invalidate_interrupt(struct pt_regs *);
+#else
+extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
+#endif
+#endif
+
 #ifdef CONFIG_X86_32
 extern void (*const interrupt[NR_IRQS])(void);
 #else
@@ -112,4 +132,4 @@
 
 #endif /* !ASSEMBLY_ */
 
-#endif
+#endif /* ASM_X86__HW_IRQ_H */
diff --git a/include/asm-x86/hypertransport.h b/include/asm-x86/hypertransport.h
index d2bbd23..cc011a3 100644
--- a/include/asm-x86/hypertransport.h
+++ b/include/asm-x86/hypertransport.h
@@ -1,5 +1,5 @@
-#ifndef ASM_HYPERTRANSPORT_H
-#define ASM_HYPERTRANSPORT_H
+#ifndef ASM_X86__HYPERTRANSPORT_H
+#define ASM_X86__HYPERTRANSPORT_H
 
 /*
  * Constants for x86 Hypertransport Interrupts.
@@ -42,4 +42,4 @@
 #define HT_IRQ_HIGH_DEST_ID(v)						\
 	((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK)
 
-#endif /* ASM_HYPERTRANSPORT_H */
+#endif /* ASM_X86__HYPERTRANSPORT_H */
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 56d00e3..1ecdc3e 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -7,8 +7,8 @@
  * x86-64 work by Andi Kleen 2002
  */
 
-#ifndef _ASM_X86_I387_H
-#define _ASM_X86_I387_H
+#ifndef ASM_X86__I387_H
+#define ASM_X86__I387_H
 
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
@@ -25,6 +25,7 @@
 extern int init_fpu(struct task_struct *child);
 extern asmlinkage void math_state_restore(void);
 extern void init_thread_xstate(void);
+extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
 
 extern user_regset_active_fn fpregs_active, xfpregs_active;
 extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
@@ -336,4 +337,4 @@
 	}
 }
 
-#endif	/* _ASM_X86_I387_H */
+#endif /* ASM_X86__I387_H */
diff --git a/include/asm-x86/i8253.h b/include/asm-x86/i8253.h
index b51c048..15a5b53 100644
--- a/include/asm-x86/i8253.h
+++ b/include/asm-x86/i8253.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_I8253_H__
-#define __ASM_I8253_H__
+#ifndef ASM_X86__I8253_H
+#define ASM_X86__I8253_H
 
 /* i8253A PIT registers */
 #define PIT_MODE		0x43
@@ -15,4 +15,4 @@
 #define inb_pit		inb_p
 #define outb_pit	outb_p
 
-#endif	/* __ASM_I8253_H__ */
+#endif /* ASM_X86__I8253_H */
diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index 2f98df9..c586559 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_I8259_H__
-#define __ASM_I8259_H__
+#ifndef ASM_X86__I8259_H
+#define ASM_X86__I8259_H
 
 #include <linux/delay.h>
 
@@ -57,4 +57,4 @@
 
 extern struct irq_chip i8259A_chip;
 
-#endif	/* __ASM_I8259_H__ */
+#endif /* ASM_X86__I8259_H */
diff --git a/include/asm-x86/ia32.h b/include/asm-x86/ia32.h
index 55d3abe..f932f7a 100644
--- a/include/asm-x86/ia32.h
+++ b/include/asm-x86/ia32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_IA32_H
-#define _ASM_X86_64_IA32_H
+#ifndef ASM_X86__IA32_H
+#define ASM_X86__IA32_H
 
 
 #ifdef CONFIG_IA32_EMULATION
@@ -167,4 +167,4 @@
 
 #endif /* !CONFIG_IA32_SUPPORT */
 
-#endif
+#endif /* ASM_X86__IA32_H */
diff --git a/include/asm-x86/ia32_unistd.h b/include/asm-x86/ia32_unistd.h
index 61cea9e..dbd887d 100644
--- a/include/asm-x86/ia32_unistd.h
+++ b/include/asm-x86/ia32_unistd.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_IA32_UNISTD_H_
-#define _ASM_X86_64_IA32_UNISTD_H_
+#ifndef ASM_X86__IA32_UNISTD_H
+#define ASM_X86__IA32_UNISTD_H
 
 /*
  * This file contains the system call numbers of the ia32 port,
@@ -15,4 +15,4 @@
 #define __NR_ia32_sigreturn	119
 #define __NR_ia32_rt_sigreturn	173
 
-#endif /* _ASM_X86_64_IA32_UNISTD_H_ */
+#endif /* ASM_X86__IA32_UNISTD_H */
diff --git a/include/asm-x86/idle.h b/include/asm-x86/idle.h
index cbb6491..baa3f783 100644
--- a/include/asm-x86/idle.h
+++ b/include/asm-x86/idle.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_IDLE_H
-#define _ASM_X86_64_IDLE_H 1
+#ifndef ASM_X86__IDLE_H
+#define ASM_X86__IDLE_H
 
 #define IDLE_START 1
 #define IDLE_END 2
@@ -12,4 +12,4 @@
 
 void c1e_remove_cpu(int cpu);
 
-#endif
+#endif /* ASM_X86__IDLE_H */
diff --git a/include/asm-x86/intel_arch_perfmon.h b/include/asm-x86/intel_arch_perfmon.h
index fa0fd06..07c03c6 100644
--- a/include/asm-x86/intel_arch_perfmon.h
+++ b/include/asm-x86/intel_arch_perfmon.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
-#define _ASM_X86_INTEL_ARCH_PERFMON_H
+#ifndef ASM_X86__INTEL_ARCH_PERFMON_H
+#define ASM_X86__INTEL_ARCH_PERFMON_H
 
 #define MSR_ARCH_PERFMON_PERFCTR0		0xc1
 #define MSR_ARCH_PERFMON_PERFCTR1		0xc2
@@ -28,4 +28,4 @@
 	unsigned int full;
 };
 
-#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */
+#endif /* ASM_X86__INTEL_ARCH_PERFMON_H */
diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h
index 0f954dc..72b7719 100644
--- a/include/asm-x86/io.h
+++ b/include/asm-x86/io.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_IO_H
-#define _ASM_X86_IO_H
+#ifndef ASM_X86__IO_H
+#define ASM_X86__IO_H
 
 #define ARCH_HAS_IOREMAP_WC
 
@@ -73,6 +73,8 @@
 #define writeq writeq
 #endif
 
+extern int iommu_bio_merge;
+
 #ifdef CONFIG_X86_32
 # include "io_32.h"
 #else
@@ -99,4 +101,4 @@
 extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
 
 
-#endif /* _ASM_X86_IO_H */
+#endif /* ASM_X86__IO_H */
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index e876d89..4f7d878 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_IO_H
-#define _ASM_IO_H
+#ifndef ASM_X86__IO_32_H
+#define ASM_X86__IO_32_H
 
 #include <linux/string.h>
 #include <linux/compiler.h>
@@ -281,4 +281,4 @@
 BUILDIO(w, w, short)
 BUILDIO(l, , int)
 
-#endif
+#endif /* ASM_X86__IO_32_H */
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index 22995c5..64429e9 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_IO_H
-#define _ASM_IO_H
+#ifndef ASM_X86__IO_64_H
+#define ASM_X86__IO_64_H
 
 
 /*
@@ -235,7 +235,6 @@
 
 #define flush_write_buffers()
 
-extern int iommu_bio_merge;
 #define BIO_VMERGE_BOUNDARY iommu_bio_merge
 
 /*
@@ -245,4 +244,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__IO_64_H */
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 14f82bb..be62847 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_IO_APIC_H
-#define __ASM_IO_APIC_H
+#ifndef ASM_X86__IO_APIC_H
+#define ASM_X86__IO_APIC_H
 
 #include <linux/types.h>
 #include <asm/mpspec.h>
@@ -189,4 +189,4 @@
 static inline void ioapic_init_mappings(void) { }
 #endif
 
-#endif
+#endif /* ASM_X86__IO_APIC_H */
diff --git a/include/asm-x86/ioctls.h b/include/asm-x86/ioctls.h
index c0c338b..3366035 100644
--- a/include/asm-x86/ioctls.h
+++ b/include/asm-x86/ioctls.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_IOCTLS_H
-#define _ASM_X86_IOCTLS_H
+#ifndef ASM_X86__IOCTLS_H
+#define ASM_X86__IOCTLS_H
 
 #include <asm/ioctl.h>
 
@@ -85,4 +85,4 @@
 
 #define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
 
-#endif
+#endif /* ASM_X86__IOCTLS_H */
diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h
index 5f888cc..e86f441 100644
--- a/include/asm-x86/iommu.h
+++ b/include/asm-x86/iommu.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X8664_IOMMU_H
-#define _ASM_X8664_IOMMU_H 1
+#ifndef ASM_X86__IOMMU_H
+#define ASM_X86__IOMMU_H
 
 extern void pci_iommu_shutdown(void);
 extern void no_iommu_init(void);
@@ -42,4 +42,4 @@
 }
 #endif
 
-#endif
+#endif /* ASM_X86__IOMMU_H */
diff --git a/include/asm-x86/ipcbuf.h b/include/asm-x86/ipcbuf.h
index ee678fd..910304f 100644
--- a/include/asm-x86/ipcbuf.h
+++ b/include/asm-x86/ipcbuf.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_IPCBUF_H
-#define _ASM_X86_IPCBUF_H
+#ifndef ASM_X86__IPCBUF_H
+#define ASM_X86__IPCBUF_H
 
 /*
  * The ipc64_perm structure for x86 architecture.
@@ -25,4 +25,4 @@
 	unsigned long		__unused2;
 };
 
-#endif /* _ASM_X86_IPCBUF_H */
+#endif /* ASM_X86__IPCBUF_H */
diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h
index bb1c09f..c1b2267 100644
--- a/include/asm-x86/ipi.h
+++ b/include/asm-x86/ipi.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_IPI_H
-#define __ASM_IPI_H
+#ifndef ASM_X86__IPI_H
+#define ASM_X86__IPI_H
 
 /*
  * Copyright 2004 James Cleverdon, IBM.
@@ -129,4 +129,4 @@
 	local_irq_restore(flags);
 }
 
-#endif /* __ASM_IPI_H */
+#endif /* ASM_X86__IPI_H */
diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h
index 1a29257..1e5f290 100644
--- a/include/asm-x86/irq.h
+++ b/include/asm-x86/irq.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_IRQ_H
-#define _ASM_IRQ_H
+#ifndef ASM_X86__IRQ_H
+#define ASM_X86__IRQ_H
 /*
  *	(C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
  *
@@ -47,4 +47,4 @@
 /* Interrupt vector management */
 extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
 
-#endif /* _ASM_IRQ_H */
+#endif /* ASM_X86__IRQ_H */
diff --git a/include/asm-x86/irq_regs_32.h b/include/asm-x86/irq_regs_32.h
index 3368b20..316a3b2 100644
--- a/include/asm-x86/irq_regs_32.h
+++ b/include/asm-x86/irq_regs_32.h
@@ -4,8 +4,8 @@
  *
  * Jeremy Fitzhardinge <jeremy@goop.org>
  */
-#ifndef _ASM_I386_IRQ_REGS_H
-#define _ASM_I386_IRQ_REGS_H
+#ifndef ASM_X86__IRQ_REGS_32_H
+#define ASM_X86__IRQ_REGS_32_H
 
 #include <asm/percpu.h>
 
@@ -26,4 +26,4 @@
 	return old_regs;
 }
 
-#endif /* _ASM_I386_IRQ_REGS_H */
+#endif /* ASM_X86__IRQ_REGS_32_H */
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
index a48c7f2..c5d2d76 100644
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_IRQ_VECTORS_H
-#define _ASM_IRQ_VECTORS_H
+#ifndef ASM_X86__IRQ_VECTORS_H
+#define ASM_X86__IRQ_VECTORS_H
 
 #include <linux/threads.h>
 
@@ -179,4 +179,4 @@
 #define VIC_CPU_BOOT_ERRATA_CPI		(VIC_CPI_LEVEL0 + 8)
 
 
-#endif /* _ASM_IRQ_VECTORS_H */
+#endif /* ASM_X86__IRQ_VECTORS_H */
diff --git a/include/asm-x86/ist.h b/include/asm-x86/ist.h
index 6ec6cee..35a2fe9 100644
--- a/include/asm-x86/ist.h
+++ b/include/asm-x86/ist.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_IST_H
-#define _ASM_IST_H
+#ifndef ASM_X86__IST_H
+#define ASM_X86__IST_H
 
 /*
  * Include file for the interface to IST BIOS
@@ -31,4 +31,4 @@
 extern struct ist_info ist_info;
 
 #endif	/* __KERNEL__ */
-#endif	/* _ASM_IST_H */
+#endif /* ASM_X86__IST_H */
diff --git a/include/asm-x86/k8.h b/include/asm-x86/k8.h
index 452e2b6..2bbaf43 100644
--- a/include/asm-x86/k8.h
+++ b/include/asm-x86/k8.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_K8_H
-#define _ASM_K8_H 1
+#ifndef ASM_X86__K8_H
+#define ASM_X86__K8_H
 
 #include <linux/pci.h>
 
@@ -12,4 +12,4 @@
 extern void k8_flush_garts(void);
 extern int k8_scan_nodes(unsigned long start, unsigned long end);
 
-#endif
+#endif /* ASM_X86__K8_H */
diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index 96651bb..5ec3ad3 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_KDEBUG_H
-#define _ASM_X86_KDEBUG_H
+#ifndef ASM_X86__KDEBUG_H
+#define ASM_X86__KDEBUG_H
 
 #include <linux/notifier.h>
 
@@ -35,4 +35,4 @@
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
 
-#endif
+#endif /* ASM_X86__KDEBUG_H */
diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h
index 4246ab7..ea09600 100644
--- a/include/asm-x86/kexec.h
+++ b/include/asm-x86/kexec.h
@@ -1,5 +1,5 @@
-#ifndef _KEXEC_H
-#define _KEXEC_H
+#ifndef ASM_X86__KEXEC_H
+#define ASM_X86__KEXEC_H
 
 #ifdef CONFIG_X86_32
 # define PA_CONTROL_PAGE	0
@@ -172,4 +172,4 @@
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* _KEXEC_H */
+#endif /* ASM_X86__KEXEC_H */
diff --git a/include/asm-x86/kgdb.h b/include/asm-x86/kgdb.h
index 94d63db..d283863 100644
--- a/include/asm-x86/kgdb.h
+++ b/include/asm-x86/kgdb.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_KGDB_H_
-#define _ASM_KGDB_H_
+#ifndef ASM_X86__KGDB_H
+#define ASM_X86__KGDB_H
 
 /*
  * Copyright (C) 2001-2004 Amit S. Kale
@@ -76,4 +76,4 @@
 #define BREAK_INSTR_SIZE	1
 #define CACHE_FLUSH_IS_SAFE	1
 
-#endif				/* _ASM_KGDB_H_ */
+#endif /* ASM_X86__KGDB_H */
diff --git a/include/asm-x86/kmap_types.h b/include/asm-x86/kmap_types.h
index 5f41741..89f4449 100644
--- a/include/asm-x86/kmap_types.h
+++ b/include/asm-x86/kmap_types.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_KMAP_TYPES_H
-#define _ASM_X86_KMAP_TYPES_H
+#ifndef ASM_X86__KMAP_TYPES_H
+#define ASM_X86__KMAP_TYPES_H
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM)
 # define D(n) __KM_FENCE_##n ,
@@ -26,4 +26,4 @@
 
 #undef D
 
-#endif
+#endif /* ASM_X86__KMAP_TYPES_H */
diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h
index 54980b0..bd840786 100644
--- a/include/asm-x86/kprobes.h
+++ b/include/asm-x86/kprobes.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_KPROBES_H
-#define _ASM_KPROBES_H
+#ifndef ASM_X86__KPROBES_H
+#define ASM_X86__KPROBES_H
 /*
  *  Kernel Probes (KProbes)
  *
@@ -94,4 +94,4 @@
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 				    unsigned long val, void *data);
-#endif				/* _ASM_KPROBES_H */
+#endif /* ASM_X86__KPROBES_H */
diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
index 6f18408..78e954d 100644
--- a/include/asm-x86/kvm.h
+++ b/include/asm-x86/kvm.h
@@ -1,5 +1,5 @@
-#ifndef __LINUX_KVM_X86_H
-#define __LINUX_KVM_X86_H
+#ifndef ASM_X86__KVM_H
+#define ASM_X86__KVM_H
 
 /*
  * KVM x86 specific structures and definitions
@@ -230,4 +230,4 @@
 #define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
 #define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
 
-#endif
+#endif /* ASM_X86__KVM_H */
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index c2e34c2..6979454 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -1,4 +1,4 @@
-#/*
+/*
  * Kernel-based Virtual Machine driver for Linux
  *
  * This header defines architecture specific interfaces, x86 version
@@ -8,8 +8,8 @@
  *
  */
 
-#ifndef ASM_KVM_HOST_H
-#define ASM_KVM_HOST_H
+#ifndef ASM_X86__KVM_HOST_H
+#define ASM_X86__KVM_HOST_H
 
 #include <linux/types.h>
 #include <linux/mm.h>
@@ -735,4 +735,4 @@
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 
-#endif
+#endif /* ASM_X86__KVM_HOST_H */
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index 76f3921..30054fd 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -1,5 +1,5 @@
-#ifndef __X86_KVM_PARA_H
-#define __X86_KVM_PARA_H
+#ifndef ASM_X86__KVM_PARA_H
+#define ASM_X86__KVM_PARA_H
 
 /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
  * should be used to determine that a VM is running under KVM.
@@ -144,4 +144,4 @@
 
 #endif
 
-#endif
+#endif /* ASM_X86__KVM_PARA_H */
diff --git a/include/asm-x86/kvm_x86_emulate.h b/include/asm-x86/kvm_x86_emulate.h
index 4e8c1e4..e2d9b03 100644
--- a/include/asm-x86/kvm_x86_emulate.h
+++ b/include/asm-x86/kvm_x86_emulate.h
@@ -8,8 +8,8 @@
  * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
  */
 
-#ifndef __X86_EMULATE_H__
-#define __X86_EMULATE_H__
+#ifndef ASM_X86__KVM_X86_EMULATE_H
+#define ASM_X86__KVM_X86_EMULATE_H
 
 struct x86_emulate_ctxt;
 
@@ -181,4 +181,4 @@
 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt,
 		     struct x86_emulate_ops *ops);
 
-#endif				/* __X86_EMULATE_H__ */
+#endif /* ASM_X86__KVM_X86_EMULATE_H */
diff --git a/include/asm-x86/ldt.h b/include/asm-x86/ldt.h
index 20c5972..a522850 100644
--- a/include/asm-x86/ldt.h
+++ b/include/asm-x86/ldt.h
@@ -3,8 +3,8 @@
  *
  * Definitions of structures used with the modify_ldt system call.
  */
-#ifndef _ASM_X86_LDT_H
-#define _ASM_X86_LDT_H
+#ifndef ASM_X86__LDT_H
+#define ASM_X86__LDT_H
 
 /* Maximum number of LDT entries supported. */
 #define LDT_ENTRIES	8192
@@ -37,4 +37,4 @@
 #define MODIFY_LDT_CONTENTS_CODE	2
 
 #endif /* !__ASSEMBLY__ */
-#endif
+#endif /* ASM_X86__LDT_H */
diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h
index be4a724..7505e94 100644
--- a/include/asm-x86/lguest.h
+++ b/include/asm-x86/lguest.h
@@ -1,5 +1,5 @@
-#ifndef _X86_LGUEST_H
-#define _X86_LGUEST_H
+#ifndef ASM_X86__LGUEST_H
+#define ASM_X86__LGUEST_H
 
 #define GDT_ENTRY_LGUEST_CS	10
 #define GDT_ENTRY_LGUEST_DS	11
@@ -91,4 +91,4 @@
 
 #endif /* __ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__LGUEST_H */
diff --git a/include/asm-x86/lguest_hcall.h b/include/asm-x86/lguest_hcall.h
index a3241f2..8f034ba 100644
--- a/include/asm-x86/lguest_hcall.h
+++ b/include/asm-x86/lguest_hcall.h
@@ -1,6 +1,6 @@
 /* Architecture specific portion of the lguest hypercalls */
-#ifndef _X86_LGUEST_HCALL_H
-#define _X86_LGUEST_HCALL_H
+#ifndef ASM_X86__LGUEST_HCALL_H
+#define ASM_X86__LGUEST_HCALL_H
 
 #define LHCALL_FLUSH_ASYNC	0
 #define LHCALL_LGUEST_INIT	1
@@ -68,4 +68,4 @@
 };
 
 #endif /* !__ASSEMBLY__ */
-#endif	/* _I386_LGUEST_HCALL_H */
+#endif /* ASM_X86__LGUEST_HCALL_H */
diff --git a/include/asm-x86/linkage.h b/include/asm-x86/linkage.h
index 64e444f..42d8b62 100644
--- a/include/asm-x86/linkage.h
+++ b/include/asm-x86/linkage.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
+#ifndef ASM_X86__LINKAGE_H
+#define ASM_X86__LINKAGE_H
 
 #undef notrace
 #define notrace __attribute__((no_instrument_function))
@@ -57,5 +57,5 @@
 #define __ALIGN_STR ".align 16,0x90"
 #endif
 
-#endif
+#endif /* ASM_X86__LINKAGE_H */
 
diff --git a/include/asm-x86/local.h b/include/asm-x86/local.h
index 330a724..ae91994 100644
--- a/include/asm-x86/local.h
+++ b/include/asm-x86/local.h
@@ -1,5 +1,5 @@
-#ifndef _ARCH_LOCAL_H
-#define _ARCH_LOCAL_H
+#ifndef ASM_X86__LOCAL_H
+#define ASM_X86__LOCAL_H
 
 #include <linux/percpu.h>
 
@@ -232,4 +232,4 @@
 #define __cpu_local_add(i, l)	cpu_local_add((i), (l))
 #define __cpu_local_sub(i, l)	cpu_local_sub((i), (l))
 
-#endif /* _ARCH_LOCAL_H */
+#endif /* ASM_X86__LOCAL_H */
diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/mach-bigsmp/mach_apic.h
index c3b9dc6..05362d4 100644
--- a/include/asm-x86/mach-bigsmp/mach_apic.h
+++ b/include/asm-x86/mach-bigsmp/mach_apic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_BIGSMP__MACH_APIC_H
+#define ASM_X86__MACH_BIGSMP__MACH_APIC_H
 
 #define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu))
 #define esr_disable (1)
@@ -141,4 +141,4 @@
 	return cpuid_apic >> index_msb;
 }
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_BIGSMP__MACH_APIC_H */
diff --git a/include/asm-x86/mach-bigsmp/mach_apicdef.h b/include/asm-x86/mach-bigsmp/mach_apicdef.h
index a58ab5a..811935d 100644
--- a/include/asm-x86/mach-bigsmp/mach_apicdef.h
+++ b/include/asm-x86/mach-bigsmp/mach_apicdef.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
+#ifndef ASM_X86__MACH_BIGSMP__MACH_APICDEF_H
+#define ASM_X86__MACH_BIGSMP__MACH_APICDEF_H
 
 #define		APIC_ID_MASK		(0xFF<<24)
 
@@ -10,4 +10,4 @@
 
 #define		GET_APIC_ID(x)	get_apic_id(x)
 
-#endif
+#endif /* ASM_X86__MACH_BIGSMP__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-bigsmp/mach_ipi.h b/include/asm-x86/mach-bigsmp/mach_ipi.h
index 9404c53..b1b0f96 100644
--- a/include/asm-x86/mach-bigsmp/mach_ipi.h
+++ b/include/asm-x86/mach-bigsmp/mach_ipi.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef ASM_X86__MACH_BIGSMP__MACH_IPI_H
+#define ASM_X86__MACH_BIGSMP__MACH_IPI_H
 
 void send_IPI_mask_sequence(cpumask_t mask, int vector);
 
@@ -22,4 +22,4 @@
 	send_IPI_mask(cpu_online_map, vector);
 }
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* ASM_X86__MACH_BIGSMP__MACH_IPI_H */
diff --git a/include/asm-x86/mach-default/apm.h b/include/asm-x86/mach-default/apm.h
index 989f34c..2aa61b5 100644
--- a/include/asm-x86/mach-default/apm.h
+++ b/include/asm-x86/mach-default/apm.h
@@ -3,8 +3,8 @@
  *  Split out from apm.c by Osamu Tomita <tomita@cinet.co.jp>
  */
 
-#ifndef _ASM_APM_H
-#define _ASM_APM_H
+#ifndef ASM_X86__MACH_DEFAULT__APM_H
+#define ASM_X86__MACH_DEFAULT__APM_H
 
 #ifdef APM_ZERO_SEGS
 #	define APM_DO_ZERO_SEGS \
@@ -70,4 +70,4 @@
 	return error;
 }
 
-#endif /* _ASM_APM_H */
+#endif /* ASM_X86__MACH_DEFAULT__APM_H */
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index f3226b9..b615f40 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_APIC_H
+#define ASM_X86__MACH_DEFAULT__MACH_APIC_H
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
@@ -138,4 +138,4 @@
 }
 
 #endif /* CONFIG_X86_LOCAL_APIC */
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_APIC_H */
diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h
index e4b29ba..936704f 100644
--- a/include/asm-x86/mach-default/mach_apicdef.h
+++ b/include/asm-x86/mach-default/mach_apicdef.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_APICDEF_H
+#define ASM_X86__MACH_DEFAULT__MACH_APICDEF_H
 
 #include <asm/apic.h>
 
@@ -21,4 +21,4 @@
 #define		GET_APIC_ID(x)	get_apic_id(x)
 #endif
 
-#endif
+#endif /* ASM_X86__MACH_DEFAULT__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-default/mach_ipi.h b/include/asm-x86/mach-default/mach_ipi.h
index be32336..674bc7e 100644
--- a/include/asm-x86/mach-default/mach_ipi.h
+++ b/include/asm-x86/mach-default/mach_ipi.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_IPI_H
+#define ASM_X86__MACH_DEFAULT__MACH_IPI_H
 
 /* Avoid include hell */
 #define NMI_VECTOR 0x02
@@ -61,4 +61,4 @@
 }
 #endif
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_IPI_H */
diff --git a/include/asm-x86/mach-default/mach_mpparse.h b/include/asm-x86/mach-default/mach_mpparse.h
index d141085..9c381f2 100644
--- a/include/asm-x86/mach-default/mach_mpparse.h
+++ b/include/asm-x86/mach-default/mach_mpparse.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPPARSE_H
-#define __ASM_MACH_MPPARSE_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H
+#define ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H
 
 static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, 
 		char *productid)
@@ -14,4 +14,4 @@
 }
 
 
-#endif /* __ASM_MACH_MPPARSE_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-default/mach_mpspec.h b/include/asm-x86/mach-default/mach_mpspec.h
index 51c9a97..d77646f 100644
--- a/include/asm-x86/mach-default/mach_mpspec.h
+++ b/include/asm-x86/mach-default/mach_mpspec.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPSPEC_H
-#define __ASM_MACH_MPSPEC_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H
+#define ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H
 
 #define MAX_IRQ_SOURCES 256
 
@@ -9,4 +9,4 @@
 #define MAX_MP_BUSSES 32
 #endif
 
-#endif /* __ASM_MACH_MPSPEC_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H */
diff --git a/include/asm-x86/mach-default/mach_timer.h b/include/asm-x86/mach-default/mach_timer.h
index 4b76e53..990b158 100644
--- a/include/asm-x86/mach-default/mach_timer.h
+++ b/include/asm-x86/mach-default/mach_timer.h
@@ -10,8 +10,8 @@
  * directly because of the awkward 8-bit access mechanism of the 82C54
  * device.
  */
-#ifndef _MACH_TIMER_H
-#define _MACH_TIMER_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_TIMER_H
+#define ASM_X86__MACH_DEFAULT__MACH_TIMER_H
 
 #define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
 #define CALIBRATE_LATCH	\
@@ -45,4 +45,4 @@
 	*count_p = count;
 }
 
-#endif /* !_MACH_TIMER_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_TIMER_H */
diff --git a/include/asm-x86/mach-default/mach_traps.h b/include/asm-x86/mach-default/mach_traps.h
index 2fe7705..de9ac3f 100644
--- a/include/asm-x86/mach-default/mach_traps.h
+++ b/include/asm-x86/mach-default/mach_traps.h
@@ -2,8 +2,8 @@
  *  Machine specific NMI handling for generic.
  *  Split out from traps.c by Osamu Tomita <tomita@cinet.co.jp>
  */
-#ifndef _MACH_TRAPS_H
-#define _MACH_TRAPS_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_TRAPS_H
+#define ASM_X86__MACH_DEFAULT__MACH_TRAPS_H
 
 #include <asm/mc146818rtc.h>
 
@@ -36,4 +36,4 @@
 		unlock_cmos();
 }
 
-#endif /* !_MACH_TRAPS_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_TRAPS_H */
diff --git a/include/asm-x86/mach-default/mach_wakecpu.h b/include/asm-x86/mach-default/mach_wakecpu.h
index 3ebb178..361b810 100644
--- a/include/asm-x86/mach-default/mach_wakecpu.h
+++ b/include/asm-x86/mach-default/mach_wakecpu.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_WAKECPU_H
-#define __ASM_MACH_WAKECPU_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H
+#define ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H
 
 /* 
  * This file copes with machines that wakeup secondary CPUs by the
@@ -39,4 +39,4 @@
  #define inquire_remote_apic(apicid) {}
 #endif
 
-#endif /* __ASM_MACH_WAKECPU_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H */
diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h
index 0a3fdf9..c1f6f68 100644
--- a/include/asm-x86/mach-es7000/mach_apic.h
+++ b/include/asm-x86/mach-es7000/mach_apic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_ES7000__MACH_APIC_H
+#define ASM_X86__MACH_ES7000__MACH_APIC_H
 
 #define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu)
 #define esr_disable (1)
@@ -191,4 +191,4 @@
 	return cpuid_apic >> index_msb;
 }
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_ES7000__MACH_APIC_H */
diff --git a/include/asm-x86/mach-es7000/mach_apicdef.h b/include/asm-x86/mach-es7000/mach_apicdef.h
index a58ab5a..a07e567 100644
--- a/include/asm-x86/mach-es7000/mach_apicdef.h
+++ b/include/asm-x86/mach-es7000/mach_apicdef.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
+#ifndef ASM_X86__MACH_ES7000__MACH_APICDEF_H
+#define ASM_X86__MACH_ES7000__MACH_APICDEF_H
 
 #define		APIC_ID_MASK		(0xFF<<24)
 
@@ -10,4 +10,4 @@
 
 #define		GET_APIC_ID(x)	get_apic_id(x)
 
-#endif
+#endif /* ASM_X86__MACH_ES7000__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-es7000/mach_ipi.h b/include/asm-x86/mach-es7000/mach_ipi.h
index 5e61bd2..3a21240 100644
--- a/include/asm-x86/mach-es7000/mach_ipi.h
+++ b/include/asm-x86/mach-es7000/mach_ipi.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef ASM_X86__MACH_ES7000__MACH_IPI_H
+#define ASM_X86__MACH_ES7000__MACH_IPI_H
 
 void send_IPI_mask_sequence(cpumask_t mask, int vector);
 
@@ -21,4 +21,4 @@
 	send_IPI_mask(cpu_online_map, vector);
 }
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* ASM_X86__MACH_ES7000__MACH_IPI_H */
diff --git a/include/asm-x86/mach-es7000/mach_mpparse.h b/include/asm-x86/mach-es7000/mach_mpparse.h
index ef26d35..befde24 100644
--- a/include/asm-x86/mach-es7000/mach_mpparse.h
+++ b/include/asm-x86/mach-es7000/mach_mpparse.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPPARSE_H
-#define __ASM_MACH_MPPARSE_H
+#ifndef ASM_X86__MACH_ES7000__MACH_MPPARSE_H
+#define ASM_X86__MACH_ES7000__MACH_MPPARSE_H
 
 #include <linux/acpi.h>
 
@@ -26,4 +26,4 @@
 }
 #endif
 
-#endif /* __ASM_MACH_MPPARSE_H */
+#endif /* ASM_X86__MACH_ES7000__MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-es7000/mach_wakecpu.h b/include/asm-x86/mach-es7000/mach_wakecpu.h
index 84ff583..97c776c 100644
--- a/include/asm-x86/mach-es7000/mach_wakecpu.h
+++ b/include/asm-x86/mach-es7000/mach_wakecpu.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_WAKECPU_H
-#define __ASM_MACH_WAKECPU_H
+#ifndef ASM_X86__MACH_ES7000__MACH_WAKECPU_H
+#define ASM_X86__MACH_ES7000__MACH_WAKECPU_H
 
 /* 
  * This file copes with machines that wakeup secondary CPUs by the
@@ -56,4 +56,4 @@
  #define inquire_remote_apic(apicid) {}
 #endif
 
-#endif /* __ASM_MACH_WAKECPU_H */
+#endif /* ASM_X86__MACH_ES7000__MACH_WAKECPU_H */
diff --git a/include/asm-x86/mach-generic/gpio.h b/include/asm-x86/mach-generic/gpio.h
index 5305dcb..6ce0f77 100644
--- a/include/asm-x86/mach-generic/gpio.h
+++ b/include/asm-x86/mach-generic/gpio.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_GENERIC_GPIO_H
-#define __ASM_MACH_GENERIC_GPIO_H
+#ifndef ASM_X86__MACH_GENERIC__GPIO_H
+#define ASM_X86__MACH_GENERIC__GPIO_H
 
 int gpio_request(unsigned gpio, const char *label);
 void gpio_free(unsigned gpio);
@@ -12,4 +12,4 @@
 
 #include <asm-generic/gpio.h>           /* cansleep wrappers */
 
-#endif /* __ASM_MACH_GENERIC_GPIO_H */
+#endif /* ASM_X86__MACH_GENERIC__GPIO_H */
diff --git a/include/asm-x86/mach-generic/irq_vectors_limits.h b/include/asm-x86/mach-generic/irq_vectors_limits.h
index 890ce3f..f7870e1 100644
--- a/include/asm-x86/mach-generic/irq_vectors_limits.h
+++ b/include/asm-x86/mach-generic/irq_vectors_limits.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_IRQ_VECTORS_LIMITS_H
-#define _ASM_IRQ_VECTORS_LIMITS_H
+#ifndef ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
+#define ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
 
 /*
  * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
@@ -11,4 +11,4 @@
 #define NR_IRQS	224
 #define NR_IRQ_VECTORS	1024
 
-#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
+#endif /* ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/mach-generic/mach_apic.h b/include/asm-x86/mach-generic/mach_apic.h
index 6eff343..5d010c6 100644
--- a/include/asm-x86/mach-generic/mach_apic.h
+++ b/include/asm-x86/mach-generic/mach_apic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_GENERIC__MACH_APIC_H
+#define ASM_X86__MACH_GENERIC__MACH_APIC_H
 
 #include <asm/genapic.h>
 
@@ -29,4 +29,4 @@
 
 extern void generic_bigsmp_probe(void);
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_GENERIC__MACH_APIC_H */
diff --git a/include/asm-x86/mach-generic/mach_apicdef.h b/include/asm-x86/mach-generic/mach_apicdef.h
index 28ed989..1657f38 100644
--- a/include/asm-x86/mach-generic/mach_apicdef.h
+++ b/include/asm-x86/mach-generic/mach_apicdef.h
@@ -1,5 +1,5 @@
-#ifndef _GENAPIC_MACH_APICDEF_H
-#define _GENAPIC_MACH_APICDEF_H 1
+#ifndef ASM_X86__MACH_GENERIC__MACH_APICDEF_H
+#define ASM_X86__MACH_GENERIC__MACH_APICDEF_H
 
 #ifndef APIC_DEFINITION
 #include <asm/genapic.h>
@@ -8,4 +8,4 @@
 #define APIC_ID_MASK (genapic->apic_id_mask)
 #endif
 
-#endif
+#endif /* ASM_X86__MACH_GENERIC__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-generic/mach_ipi.h b/include/asm-x86/mach-generic/mach_ipi.h
index 441b0fe..f67433d 100644
--- a/include/asm-x86/mach-generic/mach_ipi.h
+++ b/include/asm-x86/mach-generic/mach_ipi.h
@@ -1,5 +1,5 @@
-#ifndef _MACH_IPI_H
-#define _MACH_IPI_H 1
+#ifndef ASM_X86__MACH_GENERIC__MACH_IPI_H
+#define ASM_X86__MACH_GENERIC__MACH_IPI_H
 
 #include <asm/genapic.h>
 
@@ -7,4 +7,4 @@
 #define send_IPI_allbutself (genapic->send_IPI_allbutself)
 #define send_IPI_all (genapic->send_IPI_all)
 
-#endif
+#endif /* ASM_X86__MACH_GENERIC__MACH_IPI_H */
diff --git a/include/asm-x86/mach-generic/mach_mpparse.h b/include/asm-x86/mach-generic/mach_mpparse.h
index 586cadb..3115564 100644
--- a/include/asm-x86/mach-generic/mach_mpparse.h
+++ b/include/asm-x86/mach-generic/mach_mpparse.h
@@ -1,5 +1,5 @@
-#ifndef _MACH_MPPARSE_H
-#define _MACH_MPPARSE_H 1
+#ifndef ASM_X86__MACH_GENERIC__MACH_MPPARSE_H
+#define ASM_X86__MACH_GENERIC__MACH_MPPARSE_H
 
 
 extern int mps_oem_check(struct mp_config_table *mpc, char *oem,
@@ -7,4 +7,4 @@
 
 extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id);
 
-#endif
+#endif /* ASM_X86__MACH_GENERIC__MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-generic/mach_mpspec.h b/include/asm-x86/mach-generic/mach_mpspec.h
index c83c120..6061b15 100644
--- a/include/asm-x86/mach-generic/mach_mpspec.h
+++ b/include/asm-x86/mach-generic/mach_mpspec.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPSPEC_H
-#define __ASM_MACH_MPSPEC_H
+#ifndef ASM_X86__MACH_GENERIC__MACH_MPSPEC_H
+#define ASM_X86__MACH_GENERIC__MACH_MPSPEC_H
 
 #define MAX_IRQ_SOURCES 256
 
@@ -9,4 +9,4 @@
 
 extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
 				char *productid);
-#endif /* __ASM_MACH_MPSPEC_H */
+#endif /* ASM_X86__MACH_GENERIC__MACH_MPSPEC_H */
diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/mach-numaq/mach_apic.h
index d802465..7a0d39e 100644
--- a/include/asm-x86/mach-numaq/mach_apic.h
+++ b/include/asm-x86/mach-numaq/mach_apic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_NUMAQ__MACH_APIC_H
+#define ASM_X86__MACH_NUMAQ__MACH_APIC_H
 
 #include <asm/io.h>
 #include <linux/mmzone.h>
@@ -135,4 +135,4 @@
 	return cpuid_apic >> index_msb;
 }
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_NUMAQ__MACH_APIC_H */
diff --git a/include/asm-x86/mach-numaq/mach_apicdef.h b/include/asm-x86/mach-numaq/mach_apicdef.h
index bf439d0..f870ec5 100644
--- a/include/asm-x86/mach-numaq/mach_apicdef.h
+++ b/include/asm-x86/mach-numaq/mach_apicdef.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
+#ifndef ASM_X86__MACH_NUMAQ__MACH_APICDEF_H
+#define ASM_X86__MACH_NUMAQ__MACH_APICDEF_H
 
 
 #define APIC_ID_MASK (0xF<<24)
@@ -11,4 +11,4 @@
 
 #define         GET_APIC_ID(x)  get_apic_id(x)
 
-#endif
+#endif /* ASM_X86__MACH_NUMAQ__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-numaq/mach_ipi.h b/include/asm-x86/mach-numaq/mach_ipi.h
index c604448..1e83582 100644
--- a/include/asm-x86/mach-numaq/mach_ipi.h
+++ b/include/asm-x86/mach-numaq/mach_ipi.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef ASM_X86__MACH_NUMAQ__MACH_IPI_H
+#define ASM_X86__MACH_NUMAQ__MACH_IPI_H
 
 void send_IPI_mask_sequence(cpumask_t, int vector);
 
@@ -22,4 +22,4 @@
 	send_IPI_mask(cpu_online_map, vector);
 }
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* ASM_X86__MACH_NUMAQ__MACH_IPI_H */
diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h
index 626aef6..74ade18 100644
--- a/include/asm-x86/mach-numaq/mach_mpparse.h
+++ b/include/asm-x86/mach-numaq/mach_mpparse.h
@@ -1,7 +1,7 @@
-#ifndef __ASM_MACH_MPPARSE_H
-#define __ASM_MACH_MPPARSE_H
+#ifndef ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H
+#define ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H
 
 extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
 				char *productid);
 
-#endif /* __ASM_MACH_MPPARSE_H */
+#endif /* ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-numaq/mach_wakecpu.h b/include/asm-x86/mach-numaq/mach_wakecpu.h
index 0053004..0db8cea 100644
--- a/include/asm-x86/mach-numaq/mach_wakecpu.h
+++ b/include/asm-x86/mach-numaq/mach_wakecpu.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_WAKECPU_H
-#define __ASM_MACH_WAKECPU_H
+#ifndef ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H
+#define ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H
 
 /* This file copes with machines that wakeup secondary CPUs by NMIs */
 
@@ -40,4 +40,4 @@
 
 #define inquire_remote_apic(apicid) {}
 
-#endif /* __ASM_MACH_WAKECPU_H */
+#endif /* ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H */
diff --git a/include/asm-x86/mach-rdc321x/gpio.h b/include/asm-x86/mach-rdc321x/gpio.h
index acce0b7..94b6cdf 100644
--- a/include/asm-x86/mach-rdc321x/gpio.h
+++ b/include/asm-x86/mach-rdc321x/gpio.h
@@ -1,5 +1,7 @@
-#ifndef _RDC321X_GPIO_H
-#define _RDC321X_GPIO_H
+#ifndef ASM_X86__MACH_RDC321X__GPIO_H
+#define ASM_X86__MACH_RDC321X__GPIO_H
+
+#include <linux/kernel.h>
 
 extern int rdc_gpio_get_value(unsigned gpio);
 extern void rdc_gpio_set_value(unsigned gpio, int value);
@@ -18,6 +20,7 @@
 
 static inline void gpio_free(unsigned gpio)
 {
+	might_sleep();
 	rdc_gpio_free(gpio);
 }
 
@@ -54,4 +57,4 @@
 /* For cansleep */
 #include <asm-generic/gpio.h>
 
-#endif /* _RDC321X_GPIO_H_ */
+#endif /* ASM_X86__MACH_RDC321X__GPIO_H */
diff --git a/include/asm-x86/mach-summit/irq_vectors_limits.h b/include/asm-x86/mach-summit/irq_vectors_limits.h
index 890ce3f..22f376a 100644
--- a/include/asm-x86/mach-summit/irq_vectors_limits.h
+++ b/include/asm-x86/mach-summit/irq_vectors_limits.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_IRQ_VECTORS_LIMITS_H
-#define _ASM_IRQ_VECTORS_LIMITS_H
+#ifndef ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H
+#define ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H
 
 /*
  * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
@@ -11,4 +11,4 @@
 #define NR_IRQS	224
 #define NR_IRQ_VECTORS	1024
 
-#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
+#endif /* ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/mach-summit/mach_apic.h
index c47e2ab..7a66758 100644
--- a/include/asm-x86/mach-summit/mach_apic.h
+++ b/include/asm-x86/mach-summit/mach_apic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_SUMMIT__MACH_APIC_H
+#define ASM_X86__MACH_SUMMIT__MACH_APIC_H
 
 #include <asm/smp.h>
 
@@ -182,4 +182,4 @@
 	return hard_smp_processor_id() >> index_msb;
 }
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_SUMMIT__MACH_APIC_H */
diff --git a/include/asm-x86/mach-summit/mach_apicdef.h b/include/asm-x86/mach-summit/mach_apicdef.h
index a58ab5a..d4bc859 100644
--- a/include/asm-x86/mach-summit/mach_apicdef.h
+++ b/include/asm-x86/mach-summit/mach_apicdef.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
+#ifndef ASM_X86__MACH_SUMMIT__MACH_APICDEF_H
+#define ASM_X86__MACH_SUMMIT__MACH_APICDEF_H
 
 #define		APIC_ID_MASK		(0xFF<<24)
 
@@ -10,4 +10,4 @@
 
 #define		GET_APIC_ID(x)	get_apic_id(x)
 
-#endif
+#endif /* ASM_X86__MACH_SUMMIT__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-summit/mach_ipi.h b/include/asm-x86/mach-summit/mach_ipi.h
index 9404c53..a3b31c5 100644
--- a/include/asm-x86/mach-summit/mach_ipi.h
+++ b/include/asm-x86/mach-summit/mach_ipi.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef ASM_X86__MACH_SUMMIT__MACH_IPI_H
+#define ASM_X86__MACH_SUMMIT__MACH_IPI_H
 
 void send_IPI_mask_sequence(cpumask_t mask, int vector);
 
@@ -22,4 +22,4 @@
 	send_IPI_mask(cpu_online_map, vector);
 }
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* ASM_X86__MACH_SUMMIT__MACH_IPI_H */
diff --git a/include/asm-x86/mach-summit/mach_mpparse.h b/include/asm-x86/mach-summit/mach_mpparse.h
index fdf5917..92396f2 100644
--- a/include/asm-x86/mach-summit/mach_mpparse.h
+++ b/include/asm-x86/mach-summit/mach_mpparse.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPPARSE_H
-#define __ASM_MACH_MPPARSE_H
+#ifndef ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H
+#define ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H
 
 #include <mach_apic.h>
 #include <asm/tsc.h>
@@ -107,4 +107,4 @@
 		rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
 }
 
-#endif /* __ASM_MACH_MPPARSE_H */
+#endif /* ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H */
diff --git a/include/asm-x86/math_emu.h b/include/asm-x86/math_emu.h
index 9bf4ae9..5768d8e 100644
--- a/include/asm-x86/math_emu.h
+++ b/include/asm-x86/math_emu.h
@@ -1,5 +1,5 @@
-#ifndef _I386_MATH_EMU_H
-#define _I386_MATH_EMU_H
+#ifndef ASM_X86__MATH_EMU_H
+#define ASM_X86__MATH_EMU_H
 
 /* This structure matches the layout of the data saved to the stack
    following a device-not-present interrupt, part of it saved
@@ -28,4 +28,4 @@
 	long ___vm86_fs;
 	long ___vm86_gs;
 };
-#endif
+#endif /* ASM_X86__MATH_EMU_H */
diff --git a/include/asm-x86/mc146818rtc.h b/include/asm-x86/mc146818rtc.h
index daf1ccd..a995f33 100644
--- a/include/asm-x86/mc146818rtc.h
+++ b/include/asm-x86/mc146818rtc.h
@@ -1,8 +1,8 @@
 /*
  * Machine dependent access functions for RTC registers.
  */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
+#ifndef ASM_X86__MC146818RTC_H
+#define ASM_X86__MC146818RTC_H
 
 #include <asm/io.h>
 #include <asm/system.h>
@@ -101,4 +101,4 @@
 
 #define RTC_IRQ 8
 
-#endif /* _ASM_MC146818RTC_H */
+#endif /* ASM_X86__MC146818RTC_H */
diff --git a/include/asm-x86/mca.h b/include/asm-x86/mca.h
index 09adf2e..60d1ed2 100644
--- a/include/asm-x86/mca.h
+++ b/include/asm-x86/mca.h
@@ -1,8 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8 -*- */
 
 /* Platform specific MCA defines */
-#ifndef _ASM_MCA_H
-#define _ASM_MCA_H
+#ifndef ASM_X86__MCA_H
+#define ASM_X86__MCA_H
 
 /* Maximal number of MCA slots - actually, some machines have less, but
  * they all have sufficient number of POS registers to cover 8.
@@ -40,4 +40,4 @@
  */
 #define MCA_NUMADAPTERS (MCA_MAX_SLOT_NR+3)
 
-#endif
+#endif /* ASM_X86__MCA_H */
diff --git a/include/asm-x86/mca_dma.h b/include/asm-x86/mca_dma.h
index c3dca6e..49f22be 100644
--- a/include/asm-x86/mca_dma.h
+++ b/include/asm-x86/mca_dma.h
@@ -1,5 +1,5 @@
-#ifndef MCA_DMA_H
-#define MCA_DMA_H
+#ifndef ASM_X86__MCA_DMA_H
+#define ASM_X86__MCA_DMA_H
 
 #include <asm/io.h>
 #include <linux/ioport.h>
@@ -198,4 +198,4 @@
 	outb(mode, MCA_DMA_REG_EXE);
 }
 
-#endif /* MCA_DMA_H */
+#endif /* ASM_X86__MCA_DMA_H */
diff --git a/include/asm-x86/mce.h b/include/asm-x86/mce.h
index 531eaa5..036133e 100644
--- a/include/asm-x86/mce.h
+++ b/include/asm-x86/mce.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_MCE_H
-#define _ASM_X86_MCE_H
+#ifndef ASM_X86__MCE_H
+#define ASM_X86__MCE_H
 
 #ifdef __x86_64__
 
@@ -127,4 +127,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__MCE_H */
diff --git a/include/asm-x86/mman.h b/include/asm-x86/mman.h
index 90bc4108a..4ef28e6 100644
--- a/include/asm-x86/mman.h
+++ b/include/asm-x86/mman.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_MMAN_H
-#define _ASM_X86_MMAN_H
+#ifndef ASM_X86__MMAN_H
+#define ASM_X86__MMAN_H
 
 #include <asm-generic/mman.h>
 
@@ -17,4 +17,4 @@
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#endif /* _ASM_X86_MMAN_H */
+#endif /* ASM_X86__MMAN_H */
diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h
index e293ab8..fb79b1c 100644
--- a/include/asm-x86/mmconfig.h
+++ b/include/asm-x86/mmconfig.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_MMCONFIG_H
-#define _ASM_MMCONFIG_H
+#ifndef ASM_X86__MMCONFIG_H
+#define ASM_X86__MMCONFIG_H
 
 #ifdef CONFIG_PCI_MMCONFIG
 extern void __cpuinit fam10h_check_enable_mmcfg(void);
@@ -9,4 +9,4 @@
 static inline void check_enable_amd_mmconf_dmi(void) { }
 #endif
 
-#endif
+#endif /* ASM_X86__MMCONFIG_H */
diff --git a/include/asm-x86/mmu.h b/include/asm-x86/mmu.h
index 00e8867..9d5aff1 100644
--- a/include/asm-x86/mmu.h
+++ b/include/asm-x86/mmu.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_MMU_H
-#define _ASM_X86_MMU_H
+#ifndef ASM_X86__MMU_H
+#define ASM_X86__MMU_H
 
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
@@ -7,14 +7,9 @@
 /*
  * The x86 doesn't have a mmu context, but
  * we put the segment information here.
- *
- * cpu_vm_mask is used to optimize ldt flushing.
  */
 typedef struct {
 	void *ldt;
-#ifdef CONFIG_X86_64
-	rwlock_t ldtlock;
-#endif
 	int size;
 	struct mutex lock;
 	void *vdso;
@@ -28,4 +23,4 @@
 }
 #endif
 
-#endif /* _ASM_X86_MMU_H */
+#endif /* ASM_X86__MMU_H */
diff --git a/include/asm-x86/mmu_context.h b/include/asm-x86/mmu_context.h
index fac5701..8ec940b 100644
--- a/include/asm-x86/mmu_context.h
+++ b/include/asm-x86/mmu_context.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_X86_MMU_CONTEXT_H
-#define __ASM_X86_MMU_CONTEXT_H
+#ifndef ASM_X86__MMU_CONTEXT_H
+#define ASM_X86__MMU_CONTEXT_H
 
 #include <asm/desc.h>
 #include <asm/atomic.h>
@@ -34,4 +34,4 @@
 } while (0);
 
 
-#endif /* __ASM_X86_MMU_CONTEXT_H */
+#endif /* ASM_X86__MMU_CONTEXT_H */
diff --git a/include/asm-x86/mmu_context_32.h b/include/asm-x86/mmu_context_32.h
index 824fc57..cce6f6e 100644
--- a/include/asm-x86/mmu_context_32.h
+++ b/include/asm-x86/mmu_context_32.h
@@ -1,5 +1,5 @@
-#ifndef __I386_SCHED_H
-#define __I386_SCHED_H
+#ifndef ASM_X86__MMU_CONTEXT_32_H
+#define ASM_X86__MMU_CONTEXT_32_H
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
@@ -53,4 +53,4 @@
 #define deactivate_mm(tsk, mm)			\
 	asm("movl %0,%%gs": :"r" (0));
 
-#endif
+#endif /* ASM_X86__MMU_CONTEXT_32_H */
diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h
index c700063..2675867 100644
--- a/include/asm-x86/mmu_context_64.h
+++ b/include/asm-x86/mmu_context_64.h
@@ -1,5 +1,5 @@
-#ifndef __X86_64_MMU_CONTEXT_H
-#define __X86_64_MMU_CONTEXT_H
+#ifndef ASM_X86__MMU_CONTEXT_64_H
+#define ASM_X86__MMU_CONTEXT_64_H
 
 #include <asm/pda.h>
 
@@ -51,4 +51,4 @@
 	asm volatile("movl %0,%%fs"::"r"(0));	\
 } while (0)
 
-#endif
+#endif /* ASM_X86__MMU_CONTEXT_64_H */
diff --git a/include/asm-x86/mmx.h b/include/asm-x86/mmx.h
index 9408812..2e7299b 100644
--- a/include/asm-x86/mmx.h
+++ b/include/asm-x86/mmx.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_MMX_H
-#define _ASM_MMX_H
+#ifndef ASM_X86__MMX_H
+#define ASM_X86__MMX_H
 
 /*
  *	MMX 3Dnow! helper operations
@@ -11,4 +11,4 @@
 extern void mmx_clear_page(void *page);
 extern void mmx_copy_page(void *to, void *from);
 
-#endif
+#endif /* ASM_X86__MMX_H */
diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h
index 5862e64..121b65d 100644
--- a/include/asm-x86/mmzone_32.h
+++ b/include/asm-x86/mmzone_32.h
@@ -3,8 +3,8 @@
  *
  */
 
-#ifndef _ASM_MMZONE_H_
-#define _ASM_MMZONE_H_
+#ifndef ASM_X86__MMZONE_32_H
+#define ASM_X86__MMZONE_32_H
 
 #include <asm/smp.h>
 
@@ -131,4 +131,4 @@
 })
 #endif /* CONFIG_NEED_MULTIPLE_NODES */
 
-#endif /* _ASM_MMZONE_H_ */
+#endif /* ASM_X86__MMZONE_32_H */
diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h
index 594bd0d..626b03a 100644
--- a/include/asm-x86/mmzone_64.h
+++ b/include/asm-x86/mmzone_64.h
@@ -1,8 +1,8 @@
 /* K8 NUMA support */
 /* Copyright 2002,2003 by Andi Kleen, SuSE Labs */
 /* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */
-#ifndef _ASM_X86_64_MMZONE_H
-#define _ASM_X86_64_MMZONE_H 1
+#ifndef ASM_X86__MMZONE_64_H
+#define ASM_X86__MMZONE_64_H
 
 
 #ifdef CONFIG_NUMA
@@ -49,4 +49,4 @@
 #endif
 
 #endif
-#endif
+#endif /* ASM_X86__MMZONE_64_H */
diff --git a/include/asm-x86/module.h b/include/asm-x86/module.h
index bfedb24..48dc3e0 100644
--- a/include/asm-x86/module.h
+++ b/include/asm-x86/module.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_MODULE_H
-#define _ASM_MODULE_H
+#ifndef ASM_X86__MODULE_H
+#define ASM_X86__MODULE_H
 
 /* x86_32/64 are simple */
 struct mod_arch_specific {};
@@ -79,4 +79,4 @@
 # define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
 #endif
 
-#endif /* _ASM_MODULE_H */
+#endif /* ASM_X86__MODULE_H */
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index b6995e5..118da36 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -1,5 +1,5 @@
-#ifndef _AM_X86_MPSPEC_H
-#define _AM_X86_MPSPEC_H
+#ifndef ASM_X86__MPSPEC_H
+#define ASM_X86__MPSPEC_H
 
 #include <linux/init.h>
 
@@ -141,4 +141,4 @@
 
 extern physid_mask_t phys_cpu_present_map;
 
-#endif
+#endif /* ASM_X86__MPSPEC_H */
diff --git a/include/asm-x86/mpspec_def.h b/include/asm-x86/mpspec_def.h
index 38d1e73..79166b0 100644
--- a/include/asm-x86/mpspec_def.h
+++ b/include/asm-x86/mpspec_def.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MPSPEC_DEF_H
-#define __ASM_MPSPEC_DEF_H
+#ifndef ASM_X86__MPSPEC_DEF_H
+#define ASM_X86__MPSPEC_DEF_H
 
 /*
  * Structure definitions for SMP machines following the
@@ -177,4 +177,4 @@
 	MP_BUS_PCI,
 	MP_BUS_MCA,
 };
-#endif
+#endif /* ASM_X86__MPSPEC_DEF_H */
diff --git a/include/asm-x86/msgbuf.h b/include/asm-x86/msgbuf.h
index 7e4e948..1b538c9 100644
--- a/include/asm-x86/msgbuf.h
+++ b/include/asm-x86/msgbuf.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_MSGBUF_H
-#define _ASM_X86_MSGBUF_H
+#ifndef ASM_X86__MSGBUF_H
+#define ASM_X86__MSGBUF_H
 
 /*
  * The msqid64_ds structure for i386 architecture.
@@ -36,4 +36,4 @@
 	unsigned long  __unused5;
 };
 
-#endif /* _ASM_X86_MSGBUF_H */
+#endif /* ASM_X86__MSGBUF_H */
diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h
index 296f29c..3139666 100644
--- a/include/asm-x86/msidef.h
+++ b/include/asm-x86/msidef.h
@@ -1,5 +1,5 @@
-#ifndef ASM_MSIDEF_H
-#define ASM_MSIDEF_H
+#ifndef ASM_X86__MSIDEF_H
+#define ASM_X86__MSIDEF_H
 
 /*
  * Constants for Intel APIC based MSI messages.
@@ -48,4 +48,4 @@
 #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
 					 MSI_ADDR_DEST_ID_MASK)
 
-#endif /* ASM_MSIDEF_H */
+#endif /* ASM_X86__MSIDEF_H */
diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index 44bce77..3052f05 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_MSR_INDEX_H
-#define __ASM_MSR_INDEX_H
+#ifndef ASM_X86__MSR_INDEX_H
+#define ASM_X86__MSR_INDEX_H
 
 /* CPU model specific register (MSR) numbers */
 
@@ -310,4 +310,4 @@
 /* Geode defined MSRs */
 #define MSR_GEODE_BUSCONT_CONF0		0x00001900
 
-#endif /* __ASM_MSR_INDEX_H */
+#endif /* ASM_X86__MSR_INDEX_H */
diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 2362cfd..530af1f 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_X86_MSR_H_
-#define __ASM_X86_MSR_H_
+#ifndef ASM_X86__MSR_H
+#define ASM_X86__MSR_H
 
 #include <asm/msr-index.h>
 
@@ -63,6 +63,22 @@
 	return EAX_EDX_VAL(val, low, high);
 }
 
+static inline unsigned long long native_read_msr_amd_safe(unsigned int msr,
+						      int *err)
+{
+	DECLARE_ARGS(val, low, high);
+
+	asm volatile("2: rdmsr ; xor %0,%0\n"
+		     "1:\n\t"
+		     ".section .fixup,\"ax\"\n\t"
+		     "3:  mov %3,%0 ; jmp 1b\n\t"
+		     ".previous\n\t"
+		     _ASM_EXTABLE(2b, 3b)
+		     : "=r" (*err), EAX_EDX_RET(val, low, high)
+		     : "c" (msr), "D" (0x9c5a203a), "i" (-EFAULT));
+	return EAX_EDX_VAL(val, low, high);
+}
+
 static inline void native_write_msr(unsigned int msr,
 				    unsigned low, unsigned high)
 {
@@ -158,6 +174,13 @@
 	*p = native_read_msr_safe(msr, &err);
 	return err;
 }
+static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+{
+	int err;
+
+	*p = native_read_msr_amd_safe(msr, &err);
+	return err;
+}
 
 #define rdtscl(low)						\
 	((low) = (u32)native_read_tsc())
@@ -221,4 +244,4 @@
 #endif /* __KERNEL__ */
 
 
-#endif
+#endif /* ASM_X86__MSR_H */
diff --git a/include/asm-x86/mtrr.h b/include/asm-x86/mtrr.h
index a69a01a..23a7f83 100644
--- a/include/asm-x86/mtrr.h
+++ b/include/asm-x86/mtrr.h
@@ -20,8 +20,8 @@
     The postal address is:
       Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
 */
-#ifndef _ASM_X86_MTRR_H
-#define _ASM_X86_MTRR_H
+#ifndef ASM_X86__MTRR_H
+#define ASM_X86__MTRR_H
 
 #include <linux/ioctl.h>
 #include <linux/errno.h>
@@ -170,4 +170,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif  /*  _ASM_X86_MTRR_H  */
+#endif /* ASM_X86__MTRR_H */
diff --git a/include/asm-x86/mutex_32.h b/include/asm-x86/mutex_32.h
index 73e928e..25c16d8 100644
--- a/include/asm-x86/mutex_32.h
+++ b/include/asm-x86/mutex_32.h
@@ -6,8 +6,8 @@
  *
  *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
-#ifndef _ASM_MUTEX_H
-#define _ASM_MUTEX_H
+#ifndef ASM_X86__MUTEX_32_H
+#define ASM_X86__MUTEX_32_H
 
 #include <asm/alternative.h>
 
@@ -122,4 +122,4 @@
 #endif
 }
 
-#endif
+#endif /* ASM_X86__MUTEX_32_H */
diff --git a/include/asm-x86/mutex_64.h b/include/asm-x86/mutex_64.h
index f3fae9b..918ba21 100644
--- a/include/asm-x86/mutex_64.h
+++ b/include/asm-x86/mutex_64.h
@@ -6,8 +6,8 @@
  *
  *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
-#ifndef _ASM_MUTEX_H
-#define _ASM_MUTEX_H
+#ifndef ASM_X86__MUTEX_64_H
+#define ASM_X86__MUTEX_64_H
 
 /**
  * __mutex_fastpath_lock - decrement and call function if negative
@@ -97,4 +97,4 @@
 		return 0;
 }
 
-#endif
+#endif /* ASM_X86__MUTEX_64_H */
diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 21f8d02..d5e715f 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_NMI_H_
-#define _ASM_X86_NMI_H_
+#ifndef ASM_X86__NMI_H
+#define ASM_X86__NMI_H
 
 #include <linux/pm.h>
 #include <asm/irq.h>
@@ -34,6 +34,7 @@
 extern void disable_timer_nmi_watchdog(void);
 extern void enable_timer_nmi_watchdog(void);
 extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
+extern void cpu_nmi_set_wd_enabled(void);
 
 extern atomic_t nmi_active;
 extern unsigned int nmi_watchdog;
@@ -81,4 +82,4 @@
 void stop_nmi(void);
 void restart_nmi(void);
 
-#endif
+#endif /* ASM_X86__NMI_H */
diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h
index ad0bedd..ae74272 100644
--- a/include/asm-x86/nops.h
+++ b/include/asm-x86/nops.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_NOPS_H
-#define _ASM_NOPS_H 1
+#ifndef ASM_X86__NOPS_H
+#define ASM_X86__NOPS_H
 
 /* Define nops for use with alternative() */
 
@@ -115,4 +115,4 @@
 
 #define ASM_NOP_MAX 8
 
-#endif
+#endif /* ASM_X86__NOPS_H */
diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h
index 220d7b7..44cb078 100644
--- a/include/asm-x86/numa_32.h
+++ b/include/asm-x86/numa_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_32_NUMA_H
-#define _ASM_X86_32_NUMA_H 1
+#ifndef ASM_X86__NUMA_32_H
+#define ASM_X86__NUMA_32_H
 
 extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
@@ -8,4 +8,4 @@
 extern void set_highmem_pages_init(void);
 #endif
 
-#endif /* _ASM_X86_32_NUMA_H */
+#endif /* ASM_X86__NUMA_32_H */
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index 3830094..15c9903 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X8664_NUMA_H
-#define _ASM_X8664_NUMA_H 1
+#ifndef ASM_X86__NUMA_64_H
+#define ASM_X86__NUMA_64_H
 
 #include <linux/nodemask.h>
 #include <asm/apicdef.h>
@@ -40,4 +40,4 @@
 static inline void numa_remove_cpu(int cpu)		{ }
 #endif
 
-#endif
+#endif /* ASM_X86__NUMA_64_H */
diff --git a/include/asm-x86/numaq.h b/include/asm-x86/numaq.h
index 34b92d5..124bf7d 100644
--- a/include/asm-x86/numaq.h
+++ b/include/asm-x86/numaq.h
@@ -23,8 +23,8 @@
  * Send feedback to <gone@us.ibm.com>
  */
 
-#ifndef NUMAQ_H
-#define NUMAQ_H
+#ifndef ASM_X86__NUMAQ_H
+#define ASM_X86__NUMAQ_H
 
 #ifdef CONFIG_X86_NUMAQ
 
@@ -165,5 +165,5 @@
 	return 0;
 }
 #endif /* CONFIG_X86_NUMAQ */
-#endif /* NUMAQ_H */
+#endif /* ASM_X86__NUMAQ_H */
 
diff --git a/include/asm-x86/olpc.h b/include/asm-x86/olpc.h
index 97d4713..d7328b1 100644
--- a/include/asm-x86/olpc.h
+++ b/include/asm-x86/olpc.h
@@ -1,7 +1,7 @@
 /* OLPC machine specific definitions */
 
-#ifndef ASM_OLPC_H_
-#define ASM_OLPC_H_
+#ifndef ASM_X86__OLPC_H
+#define ASM_X86__OLPC_H
 
 #include <asm/geode.h>
 
@@ -129,4 +129,4 @@
 #define OLPC_GPIO_LID		geode_gpio(26)
 #define OLPC_GPIO_ECSCI		geode_gpio(27)
 
-#endif
+#endif /* ASM_X86__OLPC_H */
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 4998211..79544e6 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PAGE_H
-#define _ASM_X86_PAGE_H
+#ifndef ASM_X86__PAGE_H
+#define ASM_X86__PAGE_H
 
 #include <linux/const.h>
 
@@ -199,4 +199,4 @@
 #define __HAVE_ARCH_GATE_AREA 1
 
 #endif	/* __KERNEL__ */
-#endif	/* _ASM_X86_PAGE_H */
+#endif /* ASM_X86__PAGE_H */
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index ab85287..72f7305 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PAGE_32_H
-#define _ASM_X86_PAGE_32_H
+#ifndef ASM_X86__PAGE_32_H
+#define ASM_X86__PAGE_32_H
 
 /*
  * This handles the memory map.
@@ -89,13 +89,11 @@
 extern unsigned int __VMALLOC_RESERVE;
 extern int sysctl_legacy_va_layout;
 
-#define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
-#define MAXMEM			(-__PAGE_OFFSET - __VMALLOC_RESERVE)
-
 extern void find_low_pfn_range(void);
 extern unsigned long init_memory_mapping(unsigned long start,
 					 unsigned long end);
 extern void initmem_init(unsigned long, unsigned long);
+extern void free_initmem(void);
 extern void setup_bootmem_allocator(void);
 
 
@@ -126,4 +124,4 @@
 #endif	/* CONFIG_X86_3DNOW */
 #endif	/* !__ASSEMBLY__ */
 
-#endif /* _ASM_X86_PAGE_32_H */
+#endif /* ASM_X86__PAGE_32_H */
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index c6916c8..5e64acf 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -1,5 +1,5 @@
-#ifndef _X86_64_PAGE_H
-#define _X86_64_PAGE_H
+#ifndef ASM_X86__PAGE_64_H
+#define ASM_X86__PAGE_64_H
 
 #define PAGETABLE_LEVELS	4
 
@@ -91,6 +91,7 @@
 					 unsigned long end);
 
 extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern void free_initmem(void);
 
 extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
 extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
@@ -102,4 +103,4 @@
 #endif
 
 
-#endif /* _X86_64_PAGE_H */
+#endif /* ASM_X86__PAGE_64_H */
diff --git a/include/asm-x86/param.h b/include/asm-x86/param.h
index 6f0d042..0009cfb 100644
--- a/include/asm-x86/param.h
+++ b/include/asm-x86/param.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PARAM_H
-#define _ASM_X86_PARAM_H
+#ifndef ASM_X86__PARAM_H
+#define ASM_X86__PARAM_H
 
 #ifdef __KERNEL__
 # define HZ		CONFIG_HZ	/* Internal kernel timer frequency */
@@ -19,4 +19,4 @@
 
 #define MAXHOSTNAMELEN	64	/* max length of hostname */
 
-#endif /* _ASM_X86_PARAM_H */
+#endif /* ASM_X86__PARAM_H */
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index fbbde93f..891971f 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_PARAVIRT_H
-#define __ASM_PARAVIRT_H
+#ifndef ASM_X86__PARAVIRT_H
+#define ASM_X86__PARAVIRT_H
 /* Various instructions on x86 need to be replaced for
  * para-virtualization: those hooks are defined here. */
 
@@ -137,6 +137,7 @@
 
 	/* MSR, PMC and TSR operations.
 	   err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
+	u64 (*read_msr_amd)(unsigned int msr, int *err);
 	u64 (*read_msr)(unsigned int msr, int *err);
 	int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
 
@@ -257,13 +258,13 @@
 	 * Hooks for allocating/releasing pagetable pages when they're
 	 * attached to a pagetable
 	 */
-	void (*alloc_pte)(struct mm_struct *mm, u32 pfn);
-	void (*alloc_pmd)(struct mm_struct *mm, u32 pfn);
-	void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
-	void (*alloc_pud)(struct mm_struct *mm, u32 pfn);
-	void (*release_pte)(u32 pfn);
-	void (*release_pmd)(u32 pfn);
-	void (*release_pud)(u32 pfn);
+	void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
+	void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
+	void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
+	void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
+	void (*release_pte)(unsigned long pfn);
+	void (*release_pmd)(unsigned long pfn);
+	void (*release_pud)(unsigned long pfn);
 
 	/* Pagetable manipulation functions */
 	void (*set_pte)(pte_t *ptep, pte_t pteval);
@@ -726,6 +727,10 @@
 {
 	return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
 }
+static inline u64 paravirt_read_msr_amd(unsigned msr, int *err)
+{
+	return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err);
+}
 static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
 {
 	return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
@@ -771,6 +776,13 @@
 	*p = paravirt_read_msr(msr, &err);
 	return err;
 }
+static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+{
+	int err;
+
+	*p = paravirt_read_msr_amd(msr, &err);
+	return err;
+}
 
 static inline u64 paravirt_read_tsc(void)
 {
@@ -993,35 +1005,35 @@
 	PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
 }
 
-static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn)
+static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn)
 {
 	PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
 }
-static inline void paravirt_release_pte(unsigned pfn)
+static inline void paravirt_release_pte(unsigned long pfn)
 {
 	PVOP_VCALL1(pv_mmu_ops.release_pte, pfn);
 }
 
-static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn)
+static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
 {
 	PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
 }
 
-static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn,
-					    unsigned start, unsigned count)
+static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
+					    unsigned long start, unsigned long count)
 {
 	PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count);
 }
-static inline void paravirt_release_pmd(unsigned pfn)
+static inline void paravirt_release_pmd(unsigned long pfn)
 {
 	PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
 }
 
-static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn)
+static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn)
 {
 	PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn);
 }
-static inline void paravirt_release_pud(unsigned pfn)
+static inline void paravirt_release_pud(unsigned long pfn)
 {
 	PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
 }
@@ -1634,4 +1646,4 @@
 
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_PARAVIRT */
-#endif	/* __ASM_PARAVIRT_H */
+#endif /* ASM_X86__PARAVIRT_H */
diff --git a/include/asm-x86/parport.h b/include/asm-x86/parport.h
index 3c4ffeb..2e3dda4 100644
--- a/include/asm-x86/parport.h
+++ b/include/asm-x86/parport.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PARPORT_H
-#define _ASM_X86_PARPORT_H
+#ifndef ASM_X86__PARPORT_H
+#define ASM_X86__PARPORT_H
 
 static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma);
 static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma)
@@ -7,4 +7,4 @@
 	return parport_pc_find_isa_ports(autoirq, autodma);
 }
 
-#endif /* _ASM_X86_PARPORT_H */
+#endif /* ASM_X86__PARPORT_H */
diff --git a/include/asm-x86/pat.h b/include/asm-x86/pat.h
index 7edc473..482c3e3 100644
--- a/include/asm-x86/pat.h
+++ b/include/asm-x86/pat.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_PAT_H
-#define _ASM_PAT_H
+#ifndef ASM_X86__PAT_H
+#define ASM_X86__PAT_H
 
 #include <linux/types.h>
 
@@ -19,4 +19,4 @@
 
 extern void pat_disable(char *reason);
 
-#endif
+#endif /* ASM_X86__PAT_H */
diff --git a/include/asm-x86/pci-direct.h b/include/asm-x86/pci-direct.h
index 80c775d..da42be0 100644
--- a/include/asm-x86/pci-direct.h
+++ b/include/asm-x86/pci-direct.h
@@ -1,5 +1,5 @@
-#ifndef ASM_PCI_DIRECT_H
-#define ASM_PCI_DIRECT_H 1
+#ifndef ASM_X86__PCI_DIRECT_H
+#define ASM_X86__PCI_DIRECT_H
 
 #include <linux/types.h>
 
@@ -18,4 +18,4 @@
 extern unsigned int pci_early_dump_regs;
 extern void early_dump_pci_device(u8 bus, u8 slot, u8 func);
 extern void early_dump_pci_devices(void);
-#endif
+#endif /* ASM_X86__PCI_DIRECT_H */
diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index 2db14cf..6025831 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h
@@ -1,5 +1,5 @@
-#ifndef __x86_PCI_H
-#define __x86_PCI_H
+#ifndef ASM_X86__PCI_H
+#define ASM_X86__PCI_H
 
 #include <linux/mm.h> /* for struct page */
 #include <linux/types.h>
@@ -111,4 +111,4 @@
 }
 #endif
 
-#endif
+#endif /* ASM_X86__PCI_H */
diff --git a/include/asm-x86/pci_32.h b/include/asm-x86/pci_32.h
index a50d468..3f22882 100644
--- a/include/asm-x86/pci_32.h
+++ b/include/asm-x86/pci_32.h
@@ -1,5 +1,5 @@
-#ifndef __i386_PCI_H
-#define __i386_PCI_H
+#ifndef ASM_X86__PCI_32_H
+#define ASM_X86__PCI_32_H
 
 
 #ifdef __KERNEL__
@@ -31,4 +31,4 @@
 #endif /* __KERNEL__ */
 
 
-#endif /* __i386_PCI_H */
+#endif /* ASM_X86__PCI_32_H */
diff --git a/include/asm-x86/pci_64.h b/include/asm-x86/pci_64.h
index f330234..f72e12d 100644
--- a/include/asm-x86/pci_64.h
+++ b/include/asm-x86/pci_64.h
@@ -1,5 +1,5 @@
-#ifndef __x8664_PCI_H
-#define __x8664_PCI_H
+#ifndef ASM_X86__PCI_64_H
+#define ASM_X86__PCI_64_H
 
 #ifdef __KERNEL__
 
@@ -63,4 +63,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif /* __x8664_PCI_H */
+#endif /* ASM_X86__PCI_64_H */
diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h
index b34e9a7..80860af 100644
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -1,5 +1,5 @@
-#ifndef X86_64_PDA_H
-#define X86_64_PDA_H
+#ifndef ASM_X86__PDA_H
+#define ASM_X86__PDA_H
 
 #ifndef __ASSEMBLY__
 #include <linux/stddef.h>
@@ -134,4 +134,4 @@
 
 #define PDA_STACKOFFSET (5*8)
 
-#endif
+#endif /* ASM_X86__PDA_H */
diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h
index f643a3a9..e10a1d0 100644
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PERCPU_H_
-#define _ASM_X86_PERCPU_H_
+#ifndef ASM_X86__PERCPU_H
+#define ASM_X86__PERCPU_H
 
 #ifdef CONFIG_X86_64
 #include <linux/compiler.h>
@@ -215,4 +215,4 @@
 
 #endif	/* !CONFIG_SMP */
 
-#endif /* _ASM_X86_PERCPU_H_ */
+#endif /* ASM_X86__PERCPU_H */
diff --git a/include/asm-x86/pgalloc.h b/include/asm-x86/pgalloc.h
index d63ea43..3cd23ad 100644
--- a/include/asm-x86/pgalloc.h
+++ b/include/asm-x86/pgalloc.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PGALLOC_H
-#define _ASM_X86_PGALLOC_H
+#ifndef ASM_X86__PGALLOC_H
+#define ASM_X86__PGALLOC_H
 
 #include <linux/threads.h>
 #include <linux/mm.h>		/* for struct page */
@@ -111,4 +111,4 @@
 #endif	/* PAGETABLE_LEVELS > 3 */
 #endif	/* PAGETABLE_LEVELS > 2 */
 
-#endif	/* _ASM_X86_PGALLOC_H */
+#endif /* ASM_X86__PGALLOC_H */
diff --git a/include/asm-x86/pgtable-2level-defs.h b/include/asm-x86/pgtable-2level-defs.h
index 0f71c9f..7ec48f4 100644
--- a/include/asm-x86/pgtable-2level-defs.h
+++ b/include/asm-x86/pgtable-2level-defs.h
@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_2LEVEL_DEFS_H
-#define _I386_PGTABLE_2LEVEL_DEFS_H
+#ifndef ASM_X86__PGTABLE_2LEVEL_DEFS_H
+#define ASM_X86__PGTABLE_2LEVEL_DEFS_H
 
 #define SHARED_KERNEL_PMD	0
 
@@ -17,4 +17,4 @@
 
 #define PTRS_PER_PTE	1024
 
-#endif /* _I386_PGTABLE_2LEVEL_DEFS_H */
+#endif /* ASM_X86__PGTABLE_2LEVEL_DEFS_H */
diff --git a/include/asm-x86/pgtable-2level.h b/include/asm-x86/pgtable-2level.h
index 46bc52c..8176208 100644
--- a/include/asm-x86/pgtable-2level.h
+++ b/include/asm-x86/pgtable-2level.h
@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_2LEVEL_H
-#define _I386_PGTABLE_2LEVEL_H
+#ifndef ASM_X86__PGTABLE_2LEVEL_H
+#define ASM_X86__PGTABLE_2LEVEL_H
 
 #define pte_ERROR(e) \
 	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low)
@@ -53,9 +53,7 @@
 #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
 #endif
 
-#define pte_page(x)		pfn_to_page(pte_pfn(x))
 #define pte_none(x)		(!(x).pte_low)
-#define pte_pfn(x)		(pte_val(x) >> PAGE_SHIFT)
 
 /*
  * Bits 0, 6 and 7 are taken, split up the 29 bits of offset
@@ -78,4 +76,4 @@
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_low })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
-#endif /* _I386_PGTABLE_2LEVEL_H */
+#endif /* ASM_X86__PGTABLE_2LEVEL_H */
diff --git a/include/asm-x86/pgtable-3level-defs.h b/include/asm-x86/pgtable-3level-defs.h
index 448ac95..c05fe6f 100644
--- a/include/asm-x86/pgtable-3level-defs.h
+++ b/include/asm-x86/pgtable-3level-defs.h
@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_3LEVEL_DEFS_H
-#define _I386_PGTABLE_3LEVEL_DEFS_H
+#ifndef ASM_X86__PGTABLE_3LEVEL_DEFS_H
+#define ASM_X86__PGTABLE_3LEVEL_DEFS_H
 
 #ifdef CONFIG_PARAVIRT
 #define SHARED_KERNEL_PMD	(pv_info.shared_kernel_pmd)
@@ -25,4 +25,4 @@
  */
 #define PTRS_PER_PTE	512
 
-#endif /* _I386_PGTABLE_3LEVEL_DEFS_H */
+#endif /* ASM_X86__PGTABLE_3LEVEL_DEFS_H */
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index 105057f..75f4276 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h
@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_3LEVEL_H
-#define _I386_PGTABLE_3LEVEL_H
+#ifndef ASM_X86__PGTABLE_3LEVEL_H
+#define ASM_X86__PGTABLE_3LEVEL_H
 
 /*
  * Intel Physical Address Extension (PAE) Mode - three-level page
@@ -151,18 +151,11 @@
 	return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
 }
 
-#define pte_page(x)	pfn_to_page(pte_pfn(x))
-
 static inline int pte_none(pte_t pte)
 {
 	return !pte.pte_low && !pte.pte_high;
 }
 
-static inline unsigned long pte_pfn(pte_t pte)
-{
-	return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
-}
-
 /*
  * Bits 0, 6 and 7 are taken in the low part of the pte,
  * put the 32 bits of offset into the high part.
@@ -179,4 +172,4 @@
 #define __pte_to_swp_entry(pte)		((swp_entry_t){ (pte).pte_high })
 #define __swp_entry_to_pte(x)		((pte_t){ { .pte_high = (x).val } })
 
-#endif /* _I386_PGTABLE_3LEVEL_H */
+#endif /* ASM_X86__PGTABLE_3LEVEL_H */
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 04caa2f..888add7 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PGTABLE_H
-#define _ASM_X86_PGTABLE_H
+#ifndef ASM_X86__PGTABLE_H
+#define ASM_X86__PGTABLE_H
 
 #define FIRST_USER_ADDRESS	0
 
@@ -186,6 +186,13 @@
 	return pte_val(pte) & _PAGE_SPECIAL;
 }
 
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
+#define pte_page(pte)	pfn_to_page(pte_pfn(pte))
+
 static inline int pmd_large(pmd_t pte)
 {
 	return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
@@ -313,6 +320,8 @@
 static inline void native_pagetable_setup_done(pgd_t *base) {}
 #endif
 
+extern int arch_report_meminfo(char *page);
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else  /* !CONFIG_PARAVIRT */
@@ -521,4 +530,4 @@
 #include <asm-generic/pgtable.h>
 #endif	/* __ASSEMBLY__ */
 
-#endif	/* _ASM_X86_PGTABLE_H */
+#endif /* ASM_X86__PGTABLE_H */
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 5c3b265..8de702d 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_H
-#define _I386_PGTABLE_H
+#ifndef ASM_X86__PGTABLE_32_H
+#define ASM_X86__PGTABLE_32_H
 
 
 /*
@@ -31,6 +31,7 @@
 static inline void check_pgt_cache(void) { }
 void paging_init(void);
 
+extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
 
 /*
  * The Linux x86 paging architecture is 'compile-time dual-mode', it
@@ -56,8 +57,7 @@
  * area for the same reason. ;)
  */
 #define VMALLOC_OFFSET	(8 * 1024 * 1024)
-#define VMALLOC_START	(((unsigned long)high_memory + 2 * VMALLOC_OFFSET - 1) \
-			 & ~(VMALLOC_OFFSET - 1))
+#define VMALLOC_START	((unsigned long)high_memory + VMALLOC_OFFSET)
 #ifdef CONFIG_X86_PAE
 #define LAST_PKMAP 512
 #else
@@ -73,6 +73,8 @@
 # define VMALLOC_END	(FIXADDR_START - 2 * PAGE_SIZE)
 #endif
 
+#define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
+
 /*
  * Define this if things work differently on an i386 and an i486:
  * it will (on an i486) warn about kernel memory accesses that are
@@ -186,4 +188,4 @@
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)	\
 	remap_pfn_range(vma, vaddr, pfn, size, prot)
 
-#endif /* _I386_PGTABLE_H */
+#endif /* ASM_X86__PGTABLE_32_H */
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 549144d..fde9770 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -1,5 +1,5 @@
-#ifndef _X86_64_PGTABLE_H
-#define _X86_64_PGTABLE_H
+#ifndef ASM_X86__PGTABLE_64_H
+#define ASM_X86__PGTABLE_64_H
 
 #include <linux/const.h>
 #ifndef __ASSEMBLY__
@@ -175,8 +175,6 @@
 #define pte_present(x)	(pte_val((x)) & (_PAGE_PRESENT | _PAGE_PROTNONE))
 
 #define pages_to_mb(x)	((x) >> (20 - PAGE_SHIFT))   /* FIXME: is this right? */
-#define pte_page(x)	pfn_to_page(pte_pfn((x)))
-#define pte_pfn(x)	((pte_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
 /*
  * Macro to mark a page protection value as "uncacheable".
@@ -284,4 +282,4 @@
 #define __HAVE_ARCH_PTE_SAME
 #endif /* !__ASSEMBLY__ */
 
-#endif /* _X86_64_PGTABLE_H */
+#endif /* ASM_X86__PGTABLE_64_H */
diff --git a/include/asm-x86/posix_types_32.h b/include/asm-x86/posix_types_32.h
index b031efd..70cf2bb 100644
--- a/include/asm-x86/posix_types_32.h
+++ b/include/asm-x86/posix_types_32.h
@@ -1,5 +1,5 @@
-#ifndef __ARCH_I386_POSIX_TYPES_H
-#define __ARCH_I386_POSIX_TYPES_H
+#ifndef ASM_X86__POSIX_TYPES_32_H
+#define ASM_X86__POSIX_TYPES_32_H
 
 /*
  * This file is generally used by user-level software, so you need to
@@ -82,4 +82,4 @@
 
 #endif /* defined(__KERNEL__) */
 
-#endif
+#endif /* ASM_X86__POSIX_TYPES_32_H */
diff --git a/include/asm-x86/posix_types_64.h b/include/asm-x86/posix_types_64.h
index d6624c9..388b4e7 100644
--- a/include/asm-x86/posix_types_64.h
+++ b/include/asm-x86/posix_types_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_POSIX_TYPES_H
-#define _ASM_X86_64_POSIX_TYPES_H
+#ifndef ASM_X86__POSIX_TYPES_64_H
+#define ASM_X86__POSIX_TYPES_64_H
 
 /*
  * This file is generally used by user-level software, so you need to
@@ -116,4 +116,4 @@
 
 #endif /* defined(__KERNEL__) */
 
-#endif
+#endif /* ASM_X86__POSIX_TYPES_64_H */
diff --git a/include/asm-x86/prctl.h b/include/asm-x86/prctl.h
index 52952ad..e7ae34e 100644
--- a/include/asm-x86/prctl.h
+++ b/include/asm-x86/prctl.h
@@ -1,5 +1,5 @@
-#ifndef X86_64_PRCTL_H
-#define X86_64_PRCTL_H 1
+#ifndef ASM_X86__PRCTL_H
+#define ASM_X86__PRCTL_H
 
 #define ARCH_SET_GS 0x1001
 #define ARCH_SET_FS 0x1002
@@ -7,4 +7,4 @@
 #define ARCH_GET_GS 0x1004
 
 
-#endif
+#endif /* ASM_X86__PRCTL_H */
diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h
index eff2ecd..5dd7977 100644
--- a/include/asm-x86/processor-flags.h
+++ b/include/asm-x86/processor-flags.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_I386_PROCESSOR_FLAGS_H
-#define __ASM_I386_PROCESSOR_FLAGS_H
+#ifndef ASM_X86__PROCESSOR_FLAGS_H
+#define ASM_X86__PROCESSOR_FLAGS_H
 /* Various flags defined: can be included from assembler. */
 
 /*
@@ -96,4 +96,4 @@
 #endif
 #endif
 
-#endif	/* __ASM_I386_PROCESSOR_FLAGS_H */
+#endif /* ASM_X86__PROCESSOR_FLAGS_H */
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 4df3e2f..5eaf9bf 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_X86_PROCESSOR_H
-#define __ASM_X86_PROCESSOR_H
+#ifndef ASM_X86__PROCESSOR_H
+#define ASM_X86__PROCESSOR_H
 
 #include <asm/processor-flags.h>
 
@@ -20,6 +20,7 @@
 #include <asm/msr.h>
 #include <asm/desc_defs.h>
 #include <asm/nops.h>
+#include <asm/ds.h>
 
 #include <linux/personality.h>
 #include <linux/cpumask.h>
@@ -140,6 +141,8 @@
 #define current_cpu_data	boot_cpu_data
 #endif
 
+extern const struct seq_operations cpuinfo_op;
+
 static inline int hlt_works(int cpu)
 {
 #ifdef CONFIG_X86_32
@@ -153,6 +156,8 @@
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
 
+extern struct pt_regs *idle_regs(struct pt_regs *);
+
 extern void early_cpu_init(void);
 extern void identify_boot_cpu(void);
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
@@ -411,9 +416,14 @@
 	unsigned		io_bitmap_max;
 /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
 	unsigned long	debugctlmsr;
-/* Debug Store - if not 0 points to a DS Save Area configuration;
- *               goes into MSR_IA32_DS_AREA */
-	unsigned long	ds_area_msr;
+#ifdef CONFIG_X86_DS
+/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
+	struct ds_context	*ds_ctx;
+#endif /* CONFIG_X86_DS */
+#ifdef CONFIG_X86_PTRACE_BTS
+/* the signal to send on a bts buffer overflow */
+	unsigned int	bts_ovfl_signal;
+#endif /* CONFIG_X86_PTRACE_BTS */
 };
 
 static inline unsigned long native_get_debugreg(int regno)
@@ -943,4 +953,4 @@
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
-#endif
+#endif /* ASM_X86__PROCESSOR_H */
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 3dd458c..6e89e8b 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X8664_PROTO_H
-#define _ASM_X8664_PROTO_H 1
+#ifndef ASM_X86__PROTO_H
+#define ASM_X86__PROTO_H
 
 #include <asm/ldt.h>
 
@@ -29,4 +29,4 @@
 #define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1))
 #define round_down(x, y) ((x) & ~((y) - 1))
 
-#endif
+#endif /* ASM_X86__PROTO_H */
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index 72e7b9d..4298b88 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PTRACE_ABI_H
-#define _ASM_X86_PTRACE_ABI_H
+#ifndef ASM_X86__PTRACE_ABI_H
+#define ASM_X86__PTRACE_ABI_H
 
 #ifdef __i386__
 
@@ -80,8 +80,9 @@
 
 #define PTRACE_SINGLEBLOCK	33	/* resume execution until next branch */
 
-#ifndef __ASSEMBLY__
+#ifdef CONFIG_X86_PTRACE_BTS
 
+#ifndef __ASSEMBLY__
 #include <asm/types.h>
 
 /* configuration/status structure used in PTRACE_BTS_CONFIG and
@@ -97,20 +98,20 @@
 	/* actual size of bts_struct in bytes */
 	__u32 bts_size;
 };
-#endif
+#endif /* __ASSEMBLY__ */
 
 #define PTRACE_BTS_O_TRACE	0x1 /* branch trace */
 #define PTRACE_BTS_O_SCHED	0x2 /* scheduling events w/ jiffies */
 #define PTRACE_BTS_O_SIGNAL     0x4 /* send SIG<signal> on buffer overflow
 				       instead of wrapping around */
-#define PTRACE_BTS_O_CUT_SIZE	0x8 /* cut requested size to max available
-				       instead of failing */
+#define PTRACE_BTS_O_ALLOC	0x8 /* (re)allocate buffer */
 
 #define PTRACE_BTS_CONFIG	40
 /* Configure branch trace recording.
    ADDR points to a struct ptrace_bts_config.
    DATA gives the size of that buffer.
-   A new buffer is allocated, iff the size changes.
+   A new buffer is allocated, if requested in the flags.
+   An overflow signal may only be requested for new buffers.
    Returns the number of bytes read.
 */
 #define PTRACE_BTS_STATUS	41
@@ -119,7 +120,7 @@
    Returns the number of bytes written.
 */
 #define PTRACE_BTS_SIZE		42
-/* Return the number of available BTS records.
+/* Return the number of available BTS records for draining.
    DATA and ADDR are ignored.
 */
 #define PTRACE_BTS_GET		43
@@ -139,5 +140,6 @@
    BTS records are read from oldest to newest.
    Returns number of BTS records drained.
 */
+#endif /* CONFIG_X86_PTRACE_BTS */
 
-#endif
+#endif /* ASM_X86__PTRACE_ABI_H */
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 8a71db8..d64a610 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PTRACE_H
-#define _ASM_X86_PTRACE_H
+#ifndef ASM_X86__PTRACE_H
+#define ASM_X86__PTRACE_H
 
 #include <linux/compiler.h>	/* For __user */
 #include <asm/ptrace-abi.h>
@@ -127,14 +127,48 @@
 #endif /* __KERNEL__ */
 #endif /* !__i386__ */
 
+
+#ifdef CONFIG_X86_PTRACE_BTS
+/* a branch trace record entry
+ *
+ * In order to unify the interface between various processor versions,
+ * we use the below data structure for all processors.
+ */
+enum bts_qualifier {
+	BTS_INVALID = 0,
+	BTS_BRANCH,
+	BTS_TASK_ARRIVES,
+	BTS_TASK_DEPARTS
+};
+
+struct bts_struct {
+	__u64 qualifier;
+	union {
+		/* BTS_BRANCH */
+		struct {
+			__u64 from_ip;
+			__u64 to_ip;
+		} lbr;
+		/* BTS_TASK_ARRIVES or
+		   BTS_TASK_DEPARTS */
+		__u64 jiffies;
+	} variant;
+};
+#endif /* CONFIG_X86_PTRACE_BTS */
+
 #ifdef __KERNEL__
 
-/* the DS BTS struct is used for ptrace as well */
-#include <asm/ds.h>
+#include <linux/init.h>
 
+struct cpuinfo_x86;
 struct task_struct;
 
+#ifdef CONFIG_X86_PTRACE_BTS
+extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *);
 extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
+#else
+#define ptrace_bts_init_intel(config) do {} while (0)
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 extern unsigned long profile_pc(struct pt_regs *regs);
 
@@ -148,6 +182,9 @@
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 #endif
 
+extern long syscall_trace_enter(struct pt_regs *);
+extern void syscall_trace_leave(struct pt_regs *);
+
 static inline unsigned long regs_return_value(struct pt_regs *regs)
 {
 	return regs->ax;
@@ -213,6 +250,11 @@
 	return regs->bp;
 }
 
+static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+	return regs->sp;
+}
+
 /*
  * These are defined as per linux/ptrace.h, which see.
  */
@@ -239,4 +281,4 @@
 
 #endif /* !__ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__PTRACE_H */
diff --git a/include/asm-x86/pvclock-abi.h b/include/asm-x86/pvclock-abi.h
index 6857f84..edb3b4e 100644
--- a/include/asm-x86/pvclock-abi.h
+++ b/include/asm-x86/pvclock-abi.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PVCLOCK_ABI_H_
-#define _ASM_X86_PVCLOCK_ABI_H_
+#ifndef ASM_X86__PVCLOCK_ABI_H
+#define ASM_X86__PVCLOCK_ABI_H
 #ifndef __ASSEMBLY__
 
 /*
@@ -39,4 +39,4 @@
 } __attribute__((__packed__));
 
 #endif /* __ASSEMBLY__ */
-#endif /* _ASM_X86_PVCLOCK_ABI_H_ */
+#endif /* ASM_X86__PVCLOCK_ABI_H */
diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h
index 85b1bba..1a38f68 100644
--- a/include/asm-x86/pvclock.h
+++ b/include/asm-x86/pvclock.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PVCLOCK_H_
-#define _ASM_X86_PVCLOCK_H_
+#ifndef ASM_X86__PVCLOCK_H
+#define ASM_X86__PVCLOCK_H
 
 #include <linux/clocksource.h>
 #include <asm/pvclock-abi.h>
@@ -10,4 +10,4 @@
 			    struct pvclock_vcpu_time_info *vcpu,
 			    struct timespec *ts);
 
-#endif /* _ASM_X86_PVCLOCK_H_ */
+#endif /* ASM_X86__PVCLOCK_H */
diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h
index 206f355..1c2f0ce 100644
--- a/include/asm-x86/reboot.h
+++ b/include/asm-x86/reboot.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_REBOOT_H
-#define _ASM_REBOOT_H
+#ifndef ASM_X86__REBOOT_H
+#define ASM_X86__REBOOT_H
 
 struct pt_regs;
 
@@ -18,4 +18,4 @@
 void native_machine_shutdown(void);
 void machine_real_restart(const unsigned char *code, int length);
 
-#endif	/* _ASM_REBOOT_H */
+#endif /* ASM_X86__REBOOT_H */
diff --git a/include/asm-x86/reboot_fixups.h b/include/asm-x86/reboot_fixups.h
index 0cb7d87..2c2987d 100644
--- a/include/asm-x86/reboot_fixups.h
+++ b/include/asm-x86/reboot_fixups.h
@@ -1,6 +1,6 @@
-#ifndef _LINUX_REBOOT_FIXUPS_H
-#define _LINUX_REBOOT_FIXUPS_H
+#ifndef ASM_X86__REBOOT_FIXUPS_H
+#define ASM_X86__REBOOT_FIXUPS_H
 
 extern void mach_reboot_fixups(void);
 
-#endif /* _LINUX_REBOOT_FIXUPS_H */
+#endif /* ASM_X86__REBOOT_FIXUPS_H */
diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h
index 5c2ff4b..a01c4e37 100644
--- a/include/asm-x86/required-features.h
+++ b/include/asm-x86/required-features.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_REQUIRED_FEATURES_H
-#define _ASM_REQUIRED_FEATURES_H 1
+#ifndef ASM_X86__REQUIRED_FEATURES_H
+#define ASM_X86__REQUIRED_FEATURES_H
 
 /* Define minimum CPUID feature set for kernel These bits are checked
    really early to actually display a visible error message before the
@@ -79,4 +79,4 @@
 #define REQUIRED_MASK6	0
 #define REQUIRED_MASK7	0
 
-#endif
+#endif /* ASM_X86__REQUIRED_FEATURES_H */
diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h
index 8d9f0b4..e39376d 100644
--- a/include/asm-x86/resume-trace.h
+++ b/include/asm-x86/resume-trace.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_RESUME_TRACE_H
-#define _ASM_X86_RESUME_TRACE_H
+#ifndef ASM_X86__RESUME_TRACE_H
+#define ASM_X86__RESUME_TRACE_H
 
 #include <asm/asm.h>
 
@@ -7,7 +7,7 @@
 do {								\
 	if (pm_trace_enabled) {					\
 		const void *tracedata;				\
-		asm volatile(_ASM_MOV_UL " $1f,%0\n"		\
+		asm volatile(_ASM_MOV " $1f,%0\n"		\
 			     ".section .tracedata,\"a\"\n"	\
 			     "1:\t.word %c1\n\t"		\
 			     _ASM_PTR " %c2\n"			\
@@ -18,4 +18,4 @@
 	}							\
 } while (0)
 
-#endif
+#endif /* ASM_X86__RESUME_TRACE_H */
diff --git a/include/asm-x86/rio.h b/include/asm-x86/rio.h
index c9448bd..5e1256b 100644
--- a/include/asm-x86/rio.h
+++ b/include/asm-x86/rio.h
@@ -5,8 +5,8 @@
  * Author: Laurent Vivier <Laurent.Vivier@bull.net>
  */
 
-#ifndef __ASM_RIO_H
-#define __ASM_RIO_H
+#ifndef ASM_X86__RIO_H
+#define ASM_X86__RIO_H
 
 #define RIO_TABLE_VERSION	3
 
@@ -60,4 +60,4 @@
 	ALT_CALGARY	= 5,	/* Second Planar Calgary      */
 };
 
-#endif /* __ASM_RIO_H */
+#endif /* ASM_X86__RIO_H */
diff --git a/include/asm-x86/rwlock.h b/include/asm-x86/rwlock.h
index 6a8c0d6..48a3109 100644
--- a/include/asm-x86/rwlock.h
+++ b/include/asm-x86/rwlock.h
@@ -1,8 +1,8 @@
-#ifndef _ASM_X86_RWLOCK_H
-#define _ASM_X86_RWLOCK_H
+#ifndef ASM_X86__RWLOCK_H
+#define ASM_X86__RWLOCK_H
 
 #define RW_LOCK_BIAS		 0x01000000
 
 /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */
 
-#endif /* _ASM_X86_RWLOCK_H */
+#endif /* ASM_X86__RWLOCK_H */
diff --git a/include/asm-x86/rwsem.h b/include/asm-x86/rwsem.h
index 750f2a3..3ff3015 100644
--- a/include/asm-x86/rwsem.h
+++ b/include/asm-x86/rwsem.h
@@ -29,8 +29,8 @@
  * front, then they'll all be woken up, but no other readers will be.
  */
 
-#ifndef _I386_RWSEM_H
-#define _I386_RWSEM_H
+#ifndef ASM_X86__RWSEM_H
+#define ASM_X86__RWSEM_H
 
 #ifndef _LINUX_RWSEM_H
 #error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
@@ -262,4 +262,4 @@
 }
 
 #endif /* __KERNEL__ */
-#endif /* _I386_RWSEM_H */
+#endif /* ASM_X86__RWSEM_H */
diff --git a/include/asm-x86/scatterlist.h b/include/asm-x86/scatterlist.h
index c043206..ee48f88 100644
--- a/include/asm-x86/scatterlist.h
+++ b/include/asm-x86/scatterlist.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SCATTERLIST_H
-#define _ASM_X86_SCATTERLIST_H
+#ifndef ASM_X86__SCATTERLIST_H
+#define ASM_X86__SCATTERLIST_H
 
 #include <asm/types.h>
 
@@ -30,4 +30,4 @@
 # define sg_dma_len(sg)		((sg)->dma_length)
 #endif
 
-#endif
+#endif /* ASM_X86__SCATTERLIST_H */
diff --git a/include/asm-x86/seccomp_32.h b/include/asm-x86/seccomp_32.h
index 36e71c5..cf9ab2d 100644
--- a/include/asm-x86/seccomp_32.h
+++ b/include/asm-x86/seccomp_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_SECCOMP_H
-#define _ASM_SECCOMP_H
+#ifndef ASM_X86__SECCOMP_32_H
+#define ASM_X86__SECCOMP_32_H
 
 #include <linux/thread_info.h>
 
@@ -14,4 +14,4 @@
 #define __NR_seccomp_exit __NR_exit
 #define __NR_seccomp_sigreturn __NR_sigreturn
 
-#endif /* _ASM_SECCOMP_H */
+#endif /* ASM_X86__SECCOMP_32_H */
diff --git a/include/asm-x86/seccomp_64.h b/include/asm-x86/seccomp_64.h
index 76cfe69..03274ce 100644
--- a/include/asm-x86/seccomp_64.h
+++ b/include/asm-x86/seccomp_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_SECCOMP_H
-#define _ASM_SECCOMP_H
+#ifndef ASM_X86__SECCOMP_64_H
+#define ASM_X86__SECCOMP_64_H
 
 #include <linux/thread_info.h>
 
@@ -22,4 +22,4 @@
 #define __NR_seccomp_exit_32 __NR_ia32_exit
 #define __NR_seccomp_sigreturn_32 __NR_ia32_sigreturn
 
-#endif /* _ASM_SECCOMP_H */
+#endif /* ASM_X86__SECCOMP_64_H */
diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h
index 646452e..ea5f0a8 100644
--- a/include/asm-x86/segment.h
+++ b/include/asm-x86/segment.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SEGMENT_H_
-#define _ASM_X86_SEGMENT_H_
+#ifndef ASM_X86__SEGMENT_H
+#define ASM_X86__SEGMENT_H
 
 /* Constructor for a conventional segment GDT (or LDT) entry */
 /* This is a macro so it can be used in initializers */
@@ -212,4 +212,4 @@
 #endif
 #endif
 
-#endif
+#endif /* ASM_X86__SEGMENT_H */
diff --git a/include/asm-x86/sembuf.h b/include/asm-x86/sembuf.h
index ee50c80..81f06b7 100644
--- a/include/asm-x86/sembuf.h
+++ b/include/asm-x86/sembuf.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SEMBUF_H
-#define _ASM_X86_SEMBUF_H
+#ifndef ASM_X86__SEMBUF_H
+#define ASM_X86__SEMBUF_H
 
 /*
  * The semid64_ds structure for x86 architecture.
@@ -21,4 +21,4 @@
 	unsigned long	__unused4;
 };
 
-#endif /* _ASM_X86_SEMBUF_H */
+#endif /* ASM_X86__SEMBUF_H */
diff --git a/include/asm-x86/serial.h b/include/asm-x86/serial.h
index 628c801..303660b 100644
--- a/include/asm-x86/serial.h
+++ b/include/asm-x86/serial.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SERIAL_H
-#define _ASM_X86_SERIAL_H
+#ifndef ASM_X86__SERIAL_H
+#define ASM_X86__SERIAL_H
 
 /*
  * This assumes you have a 1.8432 MHz clock for your UART.
@@ -26,4 +26,4 @@
 	{ 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS },	/* ttyS2 */	\
 	{ 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS },	/* ttyS3 */
 
-#endif /* _ASM_X86_SERIAL_H */
+#endif /* ASM_X86__SERIAL_H */
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index a07c6f1..9030cb7 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SETUP_H
-#define _ASM_X86_SETUP_H
+#ifndef ASM_X86__SETUP_H
+#define ASM_X86__SETUP_H
 
 #define COMMAND_LINE_SIZE 2048
 
@@ -41,6 +41,7 @@
 };
 
 extern struct x86_quirks *x86_quirks;
+extern unsigned long saved_video_mode;
 
 #ifndef CONFIG_PARAVIRT
 #define paravirt_post_allocator_init()	do {} while (0)
@@ -100,4 +101,4 @@
 #endif /* __ASSEMBLY__ */
 #endif  /*  __KERNEL__  */
 
-#endif /* _ASM_X86_SETUP_H */
+#endif /* ASM_X86__SETUP_H */
diff --git a/include/asm-x86/shmbuf.h b/include/asm-x86/shmbuf.h
index b51413b..f51aec2 100644
--- a/include/asm-x86/shmbuf.h
+++ b/include/asm-x86/shmbuf.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SHMBUF_H
-#define _ASM_X86_SHMBUF_H
+#ifndef ASM_X86__SHMBUF_H
+#define ASM_X86__SHMBUF_H
 
 /*
  * The shmid64_ds structure for x86 architecture.
@@ -48,4 +48,4 @@
 	unsigned long	__unused4;
 };
 
-#endif /* _ASM_X86_SHMBUF_H */
+#endif /* ASM_X86__SHMBUF_H */
diff --git a/include/asm-x86/shmparam.h b/include/asm-x86/shmparam.h
index 0880cf0..a83a1fd 100644
--- a/include/asm-x86/shmparam.h
+++ b/include/asm-x86/shmparam.h
@@ -1,6 +1,6 @@
-#ifndef _ASM_X86_SHMPARAM_H
-#define _ASM_X86_SHMPARAM_H
+#ifndef ASM_X86__SHMPARAM_H
+#define ASM_X86__SHMPARAM_H
 
 #define SHMLBA PAGE_SIZE	 /* attach addr a multiple of this */
 
-#endif /* _ASM_X86_SHMPARAM_H */
+#endif /* ASM_X86__SHMPARAM_H */
diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h
index 2f9c884..24879c8 100644
--- a/include/asm-x86/sigcontext.h
+++ b/include/asm-x86/sigcontext.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SIGCONTEXT_H
-#define _ASM_X86_SIGCONTEXT_H
+#ifndef ASM_X86__SIGCONTEXT_H
+#define ASM_X86__SIGCONTEXT_H
 
 #include <linux/compiler.h>
 #include <asm/types.h>
@@ -202,4 +202,4 @@
 
 #endif /* !__i386__ */
 
-#endif
+#endif /* ASM_X86__SIGCONTEXT_H */
diff --git a/include/asm-x86/sigcontext32.h b/include/asm-x86/sigcontext32.h
index 57a9686..4e2ec73 100644
--- a/include/asm-x86/sigcontext32.h
+++ b/include/asm-x86/sigcontext32.h
@@ -1,5 +1,5 @@
-#ifndef _SIGCONTEXT32_H
-#define _SIGCONTEXT32_H 1
+#ifndef ASM_X86__SIGCONTEXT32_H
+#define ASM_X86__SIGCONTEXT32_H
 
 /* signal context for 32bit programs. */
 
@@ -68,4 +68,4 @@
        unsigned int cr2;
 };
 
-#endif
+#endif /* ASM_X86__SIGCONTEXT32_H */
diff --git a/include/asm-x86/siginfo.h b/include/asm-x86/siginfo.h
index a477bea..808bdfb 100644
--- a/include/asm-x86/siginfo.h
+++ b/include/asm-x86/siginfo.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SIGINFO_H
-#define _ASM_X86_SIGINFO_H
+#ifndef ASM_X86__SIGINFO_H
+#define ASM_X86__SIGINFO_H
 
 #ifdef __x86_64__
 # define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
@@ -7,4 +7,4 @@
 
 #include <asm-generic/siginfo.h>
 
-#endif
+#endif /* ASM_X86__SIGINFO_H */
diff --git a/include/asm-x86/signal.h b/include/asm-x86/signal.h
index 6dac493..65acc82 100644
--- a/include/asm-x86/signal.h
+++ b/include/asm-x86/signal.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SIGNAL_H
-#define _ASM_X86_SIGNAL_H
+#ifndef ASM_X86__SIGNAL_H
+#define ASM_X86__SIGNAL_H
 
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
@@ -140,6 +140,9 @@
 struct k_sigaction {
 	struct sigaction sa;
 };
+
+extern void do_notify_resume(struct pt_regs *, void *, __u32);
+
 # else /* __KERNEL__ */
 /* Here we must cater to libcs that poke about in kernel headers.  */
 
@@ -256,4 +259,4 @@
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__SIGNAL_H */
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 3c877f7..04f84f4 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SMP_H_
-#define _ASM_X86_SMP_H_
+#ifndef ASM_X86__SMP_H
+#define ASM_X86__SMP_H
 #ifndef __ASSEMBLY__
 #include <linux/cpumask.h>
 #include <linux/init.h>
@@ -34,6 +34,9 @@
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 DECLARE_PER_CPU(u16, cpu_llc_id);
+#ifdef CONFIG_X86_32
+DECLARE_PER_CPU(int, cpu_number);
+#endif
 
 DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
 DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
@@ -142,7 +145,6 @@
  * from the initial startup. We map APIC_BASE very early in page_setup(),
  * so this is correct in the x86 case.
  */
-DECLARE_PER_CPU(int, cpu_number);
 #define raw_smp_processor_id() (x86_read_percpu(cpu_number))
 extern int safe_smp_processor_id(void);
 
@@ -205,4 +207,4 @@
 #endif
 
 #endif /* __ASSEMBLY__ */
-#endif
+#endif /* ASM_X86__SMP_H */
diff --git a/include/asm-x86/socket.h b/include/asm-x86/socket.h
index 80af9c4..db73274 100644
--- a/include/asm-x86/socket.h
+++ b/include/asm-x86/socket.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_SOCKET_H
-#define _ASM_SOCKET_H
+#ifndef ASM_X86__SOCKET_H
+#define ASM_X86__SOCKET_H
 
 #include <asm/sockios.h>
 
@@ -54,4 +54,4 @@
 
 #define SO_MARK			36
 
-#endif /* _ASM_SOCKET_H */
+#endif /* ASM_X86__SOCKET_H */
diff --git a/include/asm-x86/sockios.h b/include/asm-x86/sockios.h
index 49cc72b..a006704 100644
--- a/include/asm-x86/sockios.h
+++ b/include/asm-x86/sockios.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SOCKIOS_H
-#define _ASM_X86_SOCKIOS_H
+#ifndef ASM_X86__SOCKIOS_H
+#define ASM_X86__SOCKIOS_H
 
 /* Socket-level I/O control calls. */
 #define FIOSETOWN	0x8901
@@ -10,4 +10,4 @@
 #define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
 #define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
-#endif /* _ASM_X86_SOCKIOS_H */
+#endif /* ASM_X86__SOCKIOS_H */
diff --git a/include/asm-x86/sparsemem.h b/include/asm-x86/sparsemem.h
index 9bd48b0..38f8e6b 100644
--- a/include/asm-x86/sparsemem.h
+++ b/include/asm-x86/sparsemem.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SPARSEMEM_H
-#define _ASM_X86_SPARSEMEM_H
+#ifndef ASM_X86__SPARSEMEM_H
+#define ASM_X86__SPARSEMEM_H
 
 #ifdef CONFIG_SPARSEMEM
 /*
@@ -31,4 +31,4 @@
 #endif
 
 #endif /* CONFIG_SPARSEMEM */
-#endif
+#endif /* ASM_X86__SPARSEMEM_H */
diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index e39c790..93adae3 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h
@@ -1,5 +1,5 @@
-#ifndef _X86_SPINLOCK_H_
-#define _X86_SPINLOCK_H_
+#ifndef ASM_X86__SPINLOCK_H
+#define ASM_X86__SPINLOCK_H
 
 #include <asm/atomic.h>
 #include <asm/rwlock.h>
@@ -97,7 +97,7 @@
 		     "jne 1f\n\t"
 		     "movw %w0,%w1\n\t"
 		     "incb %h1\n\t"
-		     "lock ; cmpxchgw %w1,%2\n\t"
+		     LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
 		     "1:"
 		     "sete %b1\n\t"
 		     "movzbl %b1,%0\n\t"
@@ -135,7 +135,7 @@
 	int inc = 0x00010000;
 	int tmp;
 
-	asm volatile("lock ; xaddl %0, %1\n"
+	asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
 		     "movzwl %w0, %2\n\t"
 		     "shrl $16, %0\n\t"
 		     "1:\t"
@@ -162,7 +162,7 @@
 		     "cmpl %0,%1\n\t"
 		     "jne 1f\n\t"
 		     "addl $0x00010000, %1\n\t"
-		     "lock ; cmpxchgl %1,%2\n\t"
+		     LOCK_PREFIX "cmpxchgl %1,%2\n\t"
 		     "1:"
 		     "sete %b1\n\t"
 		     "movzbl %b1,%0\n\t"
@@ -366,4 +366,4 @@
 #define _raw_read_relax(lock)	cpu_relax()
 #define _raw_write_relax(lock)	cpu_relax()
 
-#endif
+#endif /* ASM_X86__SPINLOCK_H */
diff --git a/include/asm-x86/spinlock_types.h b/include/asm-x86/spinlock_types.h
index 06c071c..6aa9b56 100644
--- a/include/asm-x86/spinlock_types.h
+++ b/include/asm-x86/spinlock_types.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_SPINLOCK_TYPES_H
-#define __ASM_SPINLOCK_TYPES_H
+#ifndef ASM_X86__SPINLOCK_TYPES_H
+#define ASM_X86__SPINLOCK_TYPES_H
 
 #ifndef __LINUX_SPINLOCK_TYPES_H
 # error "please don't include this file directly"
@@ -17,4 +17,4 @@
 
 #define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
 
-#endif
+#endif /* ASM_X86__SPINLOCK_TYPES_H */
diff --git a/include/asm-x86/srat.h b/include/asm-x86/srat.h
index 774c919..5363e4f 100644
--- a/include/asm-x86/srat.h
+++ b/include/asm-x86/srat.h
@@ -24,8 +24,8 @@
  * Send feedback to Pat Gaughen <gone@us.ibm.com>
  */
 
-#ifndef _ASM_SRAT_H_
-#define _ASM_SRAT_H_
+#ifndef ASM_X86__SRAT_H
+#define ASM_X86__SRAT_H
 
 #ifdef CONFIG_ACPI_NUMA
 extern int get_memcfg_from_srat(void);
@@ -36,4 +36,4 @@
 }
 #endif
 
-#endif /* _ASM_SRAT_H_ */
+#endif /* ASM_X86__SRAT_H */
diff --git a/include/asm-x86/stacktrace.h b/include/asm-x86/stacktrace.h
index 30f8252..f43517e 100644
--- a/include/asm-x86/stacktrace.h
+++ b/include/asm-x86/stacktrace.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_STACKTRACE_H
-#define _ASM_STACKTRACE_H 1
+#ifndef ASM_X86__STACKTRACE_H
+#define ASM_X86__STACKTRACE_H
 
 extern int kstack_depth_to_print;
 
@@ -18,4 +18,4 @@
 		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data);
 
-#endif
+#endif /* ASM_X86__STACKTRACE_H */
diff --git a/include/asm-x86/stat.h b/include/asm-x86/stat.h
index 5c22dcb..1e120f6 100644
--- a/include/asm-x86/stat.h
+++ b/include/asm-x86/stat.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_STAT_H
-#define _ASM_X86_STAT_H
+#ifndef ASM_X86__STAT_H
+#define ASM_X86__STAT_H
 
 #define STAT_HAVE_NSEC 1
 
@@ -111,4 +111,4 @@
 #endif
 };
 
-#endif
+#endif /* ASM_X86__STAT_H */
diff --git a/include/asm-x86/statfs.h b/include/asm-x86/statfs.h
index 7c651aa..3f005bc 100644
--- a/include/asm-x86/statfs.h
+++ b/include/asm-x86/statfs.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_STATFS_H
-#define _ASM_X86_STATFS_H
+#ifndef ASM_X86__STATFS_H
+#define ASM_X86__STATFS_H
 
 #ifdef __i386__
 #include <asm-generic/statfs.h>
@@ -60,4 +60,4 @@
 } __attribute__((packed));
 
 #endif /* !__i386__ */
-#endif
+#endif /* ASM_X86__STATFS_H */
diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h
index 193578c..487843e 100644
--- a/include/asm-x86/string_32.h
+++ b/include/asm-x86/string_32.h
@@ -1,5 +1,5 @@
-#ifndef _I386_STRING_H_
-#define _I386_STRING_H_
+#ifndef ASM_X86__STRING_32_H
+#define ASM_X86__STRING_32_H
 
 #ifdef __KERNEL__
 
@@ -323,4 +323,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__STRING_32_H */
diff --git a/include/asm-x86/string_64.h b/include/asm-x86/string_64.h
index 52b5ab3..a2add11d 100644
--- a/include/asm-x86/string_64.h
+++ b/include/asm-x86/string_64.h
@@ -1,5 +1,5 @@
-#ifndef _X86_64_STRING_H_
-#define _X86_64_STRING_H_
+#ifndef ASM_X86__STRING_64_H
+#define ASM_X86__STRING_64_H
 
 #ifdef __KERNEL__
 
@@ -57,4 +57,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__STRING_64_H */
diff --git a/include/asm-x86/suspend_32.h b/include/asm-x86/suspend_32.h
index 8675c67..acb6d4d 100644
--- a/include/asm-x86/suspend_32.h
+++ b/include/asm-x86/suspend_32.h
@@ -3,8 +3,8 @@
  * Based on code
  * Copyright 2001 Patrick Mochel <mochel@osdl.org>
  */
-#ifndef __ASM_X86_32_SUSPEND_H
-#define __ASM_X86_32_SUSPEND_H
+#ifndef ASM_X86__SUSPEND_32_H
+#define ASM_X86__SUSPEND_32_H
 
 #include <asm/desc.h>
 #include <asm/i387.h>
@@ -48,4 +48,4 @@
 extern int acpi_save_state_mem(void);
 #endif
 
-#endif /* __ASM_X86_32_SUSPEND_H */
+#endif /* ASM_X86__SUSPEND_32_H */
diff --git a/include/asm-x86/suspend_64.h b/include/asm-x86/suspend_64.h
index dc3262b..cf821dd 100644
--- a/include/asm-x86/suspend_64.h
+++ b/include/asm-x86/suspend_64.h
@@ -3,8 +3,8 @@
  * Based on code
  * Copyright 2001 Patrick Mochel <mochel@osdl.org>
  */
-#ifndef __ASM_X86_64_SUSPEND_H
-#define __ASM_X86_64_SUSPEND_H
+#ifndef ASM_X86__SUSPEND_64_H
+#define ASM_X86__SUSPEND_64_H
 
 #include <asm/desc.h>
 #include <asm/i387.h>
@@ -49,4 +49,4 @@
 extern char core_restore_code;
 extern char restore_registers;
 
-#endif /* __ASM_X86_64_SUSPEND_H */
+#endif /* ASM_X86__SUSPEND_64_H */
diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h
index 2730b35..1e20adb 100644
--- a/include/asm-x86/swiotlb.h
+++ b/include/asm-x86/swiotlb.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_SWIOTLB_H
-#define _ASM_SWIOTLB_H 1
+#ifndef ASM_X86__SWIOTLB_H
+#define ASM_X86__SWIOTLB_H
 
 #include <asm/dma-mapping.h>
 
@@ -55,4 +55,4 @@
 
 static inline void dma_mark_clean(void *addr, size_t size) {}
 
-#endif /* _ASM_SWIOTLB_H */
+#endif /* ASM_X86__SWIOTLB_H */
diff --git a/include/asm-x86/sync_bitops.h b/include/asm-x86/sync_bitops.h
index b47a1d0..b689bee 100644
--- a/include/asm-x86/sync_bitops.h
+++ b/include/asm-x86/sync_bitops.h
@@ -1,5 +1,5 @@
-#ifndef _I386_SYNC_BITOPS_H
-#define _I386_SYNC_BITOPS_H
+#ifndef ASM_X86__SYNC_BITOPS_H
+#define ASM_X86__SYNC_BITOPS_H
 
 /*
  * Copyright 1992, Linus Torvalds.
@@ -127,4 +127,4 @@
 
 #undef ADDR
 
-#endif /* _I386_SYNC_BITOPS_H */
+#endif /* ASM_X86__SYNC_BITOPS_H */
diff --git a/include/asm-x86/syscall.h b/include/asm-x86/syscall.h
new file mode 100644
index 0000000..04c47dc
--- /dev/null
+++ b/include/asm-x86/syscall.h
@@ -0,0 +1,211 @@
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H	1
+
+#include <linux/sched.h>
+#include <linux/err.h>
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	/*
+	 * We always sign-extend a -1 value being set here,
+	 * so this is always either -1L or a syscall number.
+	 */
+	return regs->orig_ax;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	regs->ax = regs->orig_ax;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	unsigned long error = regs->ax;
+#ifdef CONFIG_IA32_EMULATION
+	/*
+	 * TS_COMPAT is set for 32-bit syscall entries and then
+	 * remains set until we return to user mode.
+	 */
+	if (task_thread_info(task)->status & TS_COMPAT)
+		/*
+		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+		 * and will match correctly in comparisons.
+		 */
+		error = (long) (int) error;
+#endif
+	return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->ax;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	regs->ax = (long) error ?: val;
+}
+
+#ifdef CONFIG_X86_32
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+	memcpy(args, &regs->bx + i, n * sizeof(args[0]));
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+	memcpy(&regs->bx + i, args, n * sizeof(args[0]));
+}
+
+#else	 /* CONFIG_X86_64 */
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+# ifdef CONFIG_IA32_EMULATION
+	if (task_thread_info(task)->status & TS_COMPAT)
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			*args++ = regs->bp;
+		case 5:
+			if (!n--) break;
+			*args++ = regs->di;
+		case 4:
+			if (!n--) break;
+			*args++ = regs->si;
+		case 3:
+			if (!n--) break;
+			*args++ = regs->dx;
+		case 2:
+			if (!n--) break;
+			*args++ = regs->cx;
+		case 1:
+			if (!n--) break;
+			*args++ = regs->bx;
+		case 0:
+			if (!n--) break;
+		default:
+			BUG();
+			break;
+		}
+	else
+# endif
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			*args++ = regs->r9;
+		case 5:
+			if (!n--) break;
+			*args++ = regs->r8;
+		case 4:
+			if (!n--) break;
+			*args++ = regs->r10;
+		case 3:
+			if (!n--) break;
+			*args++ = regs->dx;
+		case 2:
+			if (!n--) break;
+			*args++ = regs->si;
+		case 1:
+			if (!n--) break;
+			*args++ = regs->di;
+		case 0:
+			if (!n--) break;
+		default:
+			BUG();
+			break;
+		}
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+# ifdef CONFIG_IA32_EMULATION
+	if (task_thread_info(task)->status & TS_COMPAT)
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			regs->bp = *args++;
+		case 5:
+			if (!n--) break;
+			regs->di = *args++;
+		case 4:
+			if (!n--) break;
+			regs->si = *args++;
+		case 3:
+			if (!n--) break;
+			regs->dx = *args++;
+		case 2:
+			if (!n--) break;
+			regs->cx = *args++;
+		case 1:
+			if (!n--) break;
+			regs->bx = *args++;
+		case 0:
+			if (!n--) break;
+		default:
+			BUG();
+		}
+	else
+# endif
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			regs->r9 = *args++;
+		case 5:
+			if (!n--) break;
+			regs->r8 = *args++;
+		case 4:
+			if (!n--) break;
+			regs->r10 = *args++;
+		case 3:
+			if (!n--) break;
+			regs->dx = *args++;
+		case 2:
+			if (!n--) break;
+			regs->si = *args++;
+		case 1:
+			if (!n--) break;
+			regs->di = *args++;
+		case 0:
+			if (!n--) break;
+		default:
+			BUG();
+		}
+}
+
+#endif	/* CONFIG_X86_32 */
+
+#endif	/* _ASM_SYSCALL_H */
diff --git a/include/asm-x86/syscalls.h b/include/asm-x86/syscalls.h
new file mode 100644
index 0000000..87803da
--- /dev/null
+++ b/include/asm-x86/syscalls.h
@@ -0,0 +1,93 @@
+/*
+ * syscalls.h - Linux syscall interfaces (arch-specific)
+ *
+ * Copyright (c) 2008 Jaswinder Singh
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#ifndef _ASM_X86_SYSCALLS_H
+#define _ASM_X86_SYSCALLS_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+
+/* Common in X86_32 and X86_64 */
+/* kernel/ioport.c */
+asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
+
+/* X86_32 only */
+#ifdef CONFIG_X86_32
+/* kernel/process_32.c */
+asmlinkage int sys_fork(struct pt_regs);
+asmlinkage int sys_clone(struct pt_regs);
+asmlinkage int sys_vfork(struct pt_regs);
+asmlinkage int sys_execve(struct pt_regs);
+
+/* kernel/signal_32.c */
+asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
+asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
+			     struct old_sigaction __user *);
+asmlinkage int sys_sigaltstack(unsigned long);
+asmlinkage unsigned long sys_sigreturn(unsigned long);
+asmlinkage int sys_rt_sigreturn(unsigned long);
+
+/* kernel/ioport.c */
+asmlinkage long sys_iopl(unsigned long);
+
+/* kernel/ldt.c */
+asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
+
+/* kernel/sys_i386_32.c */
+asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
+			  unsigned long, unsigned long, unsigned long);
+struct mmap_arg_struct;
+asmlinkage int old_mmap(struct mmap_arg_struct __user *);
+struct sel_arg_struct;
+asmlinkage int old_select(struct sel_arg_struct __user *);
+asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
+struct old_utsname;
+asmlinkage int sys_uname(struct old_utsname __user *);
+struct oldold_utsname;
+asmlinkage int sys_olduname(struct oldold_utsname __user *);
+
+/* kernel/tls.c */
+asmlinkage int sys_set_thread_area(struct user_desc __user *);
+asmlinkage int sys_get_thread_area(struct user_desc __user *);
+
+/* kernel/vm86_32.c */
+asmlinkage int sys_vm86old(struct pt_regs);
+asmlinkage int sys_vm86(struct pt_regs);
+
+#else /* CONFIG_X86_32 */
+
+/* X86_64 only */
+/* kernel/process_64.c */
+asmlinkage long sys_fork(struct pt_regs *);
+asmlinkage long sys_clone(unsigned long, unsigned long,
+			  void __user *, void __user *,
+			  struct pt_regs *);
+asmlinkage long sys_vfork(struct pt_regs *);
+asmlinkage long sys_execve(char __user *, char __user * __user *,
+			   char __user * __user *,
+			   struct pt_regs *);
+
+/* kernel/ioport.c */
+asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
+
+/* kernel/signal_64.c */
+asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
+				struct pt_regs *);
+asmlinkage long sys_rt_sigreturn(struct pt_regs *);
+
+/* kernel/sys_x86_64.c */
+asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
+			 unsigned long, unsigned long, unsigned long);
+struct new_utsname;
+asmlinkage long sys_uname(struct new_utsname __user *);
+
+#endif /* CONFIG_X86_32 */
+#endif /* _ASM_X86_SYSCALLS_H */
diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index 983ce37..34505dd 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SYSTEM_H_
-#define _ASM_X86_SYSTEM_H_
+#ifndef ASM_X86__SYSTEM_H
+#define ASM_X86__SYSTEM_H
 
 #include <asm/asm.h>
 #include <asm/segment.h>
@@ -419,4 +419,4 @@
 	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
 }
 
-#endif
+#endif /* ASM_X86__SYSTEM_H */
diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h
index 97fa251..5aedb8b 100644
--- a/include/asm-x86/system_64.h
+++ b/include/asm-x86/system_64.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_SYSTEM_H
-#define __ASM_SYSTEM_H
+#ifndef ASM_X86__SYSTEM_64_H
+#define ASM_X86__SYSTEM_64_H
 
 #include <asm/segment.h>
 #include <asm/cmpxchg.h>
@@ -19,4 +19,4 @@
 
 #include <linux/irqflags.h>
 
-#endif
+#endif /* ASM_X86__SYSTEM_64_H */
diff --git a/include/asm-x86/tce.h b/include/asm-x86/tce.h
index b1a4ea0..e7932d7 100644
--- a/include/asm-x86/tce.h
+++ b/include/asm-x86/tce.h
@@ -21,8 +21,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#ifndef _ASM_X86_64_TCE_H
-#define _ASM_X86_64_TCE_H
+#ifndef ASM_X86__TCE_H
+#define ASM_X86__TCE_H
 
 extern unsigned int specified_table_size;
 struct iommu_table;
@@ -45,4 +45,4 @@
 extern void __init free_tce_table(void *tbl);
 extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar);
 
-#endif /* _ASM_X86_64_TCE_H */
+#endif /* ASM_X86__TCE_H */
diff --git a/include/asm-x86/termbits.h b/include/asm-x86/termbits.h
index af1b70e..3d00dc5 100644
--- a/include/asm-x86/termbits.h
+++ b/include/asm-x86/termbits.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TERMBITS_H
-#define _ASM_X86_TERMBITS_H
+#ifndef ASM_X86__TERMBITS_H
+#define ASM_X86__TERMBITS_H
 
 #include <linux/posix_types.h>
 
@@ -195,4 +195,4 @@
 #define	TCSADRAIN	1
 #define	TCSAFLUSH	2
 
-#endif /* _ASM_X86_TERMBITS_H */
+#endif /* ASM_X86__TERMBITS_H */
diff --git a/include/asm-x86/termios.h b/include/asm-x86/termios.h
index f729563..e235db2 100644
--- a/include/asm-x86/termios.h
+++ b/include/asm-x86/termios.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TERMIOS_H
-#define _ASM_X86_TERMIOS_H
+#ifndef ASM_X86__TERMIOS_H
+#define ASM_X86__TERMIOS_H
 
 #include <asm/termbits.h>
 #include <asm/ioctls.h>
@@ -110,4 +110,4 @@
 
 #endif	/* __KERNEL__ */
 
-#endif /* _ASM_X86_TERMIOS_H */
+#endif /* ASM_X86__TERMIOS_H */
diff --git a/include/asm-x86/therm_throt.h b/include/asm-x86/therm_throt.h
index 399bf60..1c7f57b 100644
--- a/include/asm-x86/therm_throt.h
+++ b/include/asm-x86/therm_throt.h
@@ -1,9 +1,9 @@
-#ifndef __ASM_I386_THERM_THROT_H__
-#define __ASM_I386_THERM_THROT_H__ 1
+#ifndef ASM_X86__THERM_THROT_H
+#define ASM_X86__THERM_THROT_H
 
 #include <asm/atomic.h>
 
 extern atomic_t therm_throt_en;
 int therm_throt_process(int curr);
 
-#endif /* __ASM_I386_THERM_THROT_H__ */
+#endif /* ASM_X86__THERM_THROT_H */
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index da0a675..4db0066 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -4,8 +4,8 @@
  * - Incorporating suggestions made by Linus Torvalds and Dave Miller
  */
 
-#ifndef _ASM_X86_THREAD_INFO_H
-#define _ASM_X86_THREAD_INFO_H
+#ifndef ASM_X86__THREAD_INFO_H
+#define ASM_X86__THREAD_INFO_H
 
 #include <linux/compiler.h>
 #include <asm/page.h>
@@ -71,6 +71,7 @@
  * Warning: layout of LSW is hardcoded in entry.S
  */
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
@@ -93,6 +94,7 @@
 #define TIF_BTS_TRACE_TS	27      /* record scheduling event timestamps */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
@@ -133,7 +135,7 @@
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY)
+	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
@@ -258,4 +260,4 @@
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 #define arch_task_cache_init arch_task_cache_init
 #endif
-#endif /* _ASM_X86_THREAD_INFO_H */
+#endif /* ASM_X86__THREAD_INFO_H */
diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h
index a17fa47..3e724ee 100644
--- a/include/asm-x86/time.h
+++ b/include/asm-x86/time.h
@@ -1,5 +1,5 @@
-#ifndef _ASMX86_TIME_H
-#define _ASMX86_TIME_H
+#ifndef ASM_X86__TIME_H
+#define ASM_X86__TIME_H
 
 extern void hpet_time_init(void);
 
@@ -46,6 +46,8 @@
 
 #endif
 
+extern void time_init(void);
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else /* !CONFIG_PARAVIRT */
@@ -58,4 +60,4 @@
 
 extern unsigned long __init calibrate_cpu(void);
 
-#endif
+#endif /* ASM_X86__TIME_H */
diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h
index fb2a4dd..d0babce 100644
--- a/include/asm-x86/timer.h
+++ b/include/asm-x86/timer.h
@@ -1,5 +1,5 @@
-#ifndef _ASMi386_TIMER_H
-#define _ASMi386_TIMER_H
+#ifndef ASM_X86__TIMER_H
+#define ASM_X86__TIMER_H
 #include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/percpu.h>
@@ -9,9 +9,12 @@
 unsigned long long native_sched_clock(void);
 unsigned long native_calibrate_tsc(void);
 
+#ifdef CONFIG_X86_32
 extern int timer_ack;
-extern int no_timer_check;
 extern int recalibrate_cpu_khz(void);
+#endif /* CONFIG_X86_32 */
+
+extern int no_timer_check;
 
 #ifndef CONFIG_PARAVIRT
 #define calibrate_tsc() native_calibrate_tsc()
@@ -60,4 +63,4 @@
 	return ns;
 }
 
-#endif
+#endif /* ASM_X86__TIMER_H */
diff --git a/include/asm-x86/timex.h b/include/asm-x86/timex.h
index 43e5a78..d1ce241 100644
--- a/include/asm-x86/timex.h
+++ b/include/asm-x86/timex.h
@@ -1,6 +1,6 @@
 /* x86 architecture timex specifications */
-#ifndef _ASM_X86_TIMEX_H
-#define _ASM_X86_TIMEX_H
+#ifndef ASM_X86__TIMEX_H
+#define ASM_X86__TIMEX_H
 
 #include <asm/processor.h>
 #include <asm/tsc.h>
@@ -16,4 +16,4 @@
 
 #define ARCH_HAS_READ_CURRENT_TIMER
 
-#endif
+#endif /* ASM_X86__TIMEX_H */
diff --git a/include/asm-x86/tlb.h b/include/asm-x86/tlb.h
index e4e9e2d..db36e9e 100644
--- a/include/asm-x86/tlb.h
+++ b/include/asm-x86/tlb.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TLB_H
-#define _ASM_X86_TLB_H
+#ifndef ASM_X86__TLB_H
+#define ASM_X86__TLB_H
 
 #define tlb_start_vma(tlb, vma) do { } while (0)
 #define tlb_end_vma(tlb, vma) do { } while (0)
@@ -8,4 +8,4 @@
 
 #include <asm-generic/tlb.h>
 
-#endif
+#endif /* ASM_X86__TLB_H */
diff --git a/include/asm-x86/tlbflush.h b/include/asm-x86/tlbflush.h
index 35c76ce..ef68b76 100644
--- a/include/asm-x86/tlbflush.h
+++ b/include/asm-x86/tlbflush.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TLBFLUSH_H
-#define _ASM_X86_TLBFLUSH_H
+#ifndef ASM_X86__TLBFLUSH_H
+#define ASM_X86__TLBFLUSH_H
 
 #include <linux/mm.h>
 #include <linux/sched.h>
@@ -165,4 +165,4 @@
 	flush_tlb_all();
 }
 
-#endif /* _ASM_X86_TLBFLUSH_H */
+#endif /* ASM_X86__TLBFLUSH_H */
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index 90ac771..7eca9bc 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -22,8 +22,8 @@
  *
  * Send feedback to <colpatch@us.ibm.com>
  */
-#ifndef _ASM_X86_TOPOLOGY_H
-#define _ASM_X86_TOPOLOGY_H
+#ifndef ASM_X86__TOPOLOGY_H
+#define ASM_X86__TOPOLOGY_H
 
 #ifdef CONFIG_X86_32
 # ifdef CONFIG_X86_HT
@@ -255,4 +255,4 @@
 }
 #endif
 
-#endif /* _ASM_X86_TOPOLOGY_H */
+#endif /* ASM_X86__TOPOLOGY_H */
diff --git a/include/asm-x86/trampoline.h b/include/asm-x86/trampoline.h
index b156b08..0406bbd 100644
--- a/include/asm-x86/trampoline.h
+++ b/include/asm-x86/trampoline.h
@@ -1,5 +1,5 @@
-#ifndef __TRAMPOLINE_HEADER
-#define __TRAMPOLINE_HEADER
+#ifndef ASM_X86__TRAMPOLINE_H
+#define ASM_X86__TRAMPOLINE_H
 
 #ifndef __ASSEMBLY__
 
@@ -18,4 +18,4 @@
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* __TRAMPOLINE_HEADER */
+#endif /* ASM_X86__TRAMPOLINE_H */
diff --git a/include/asm-x86/traps.h b/include/asm-x86/traps.h
index a4b65a7..2ccebc6 100644
--- a/include/asm-x86/traps.h
+++ b/include/asm-x86/traps.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TRAPS_H
-#define _ASM_X86_TRAPS_H
+#ifndef ASM_X86__TRAPS_H
+#define ASM_X86__TRAPS_H
 
 /* Common in X86_32 and X86_64 */
 asmlinkage void divide_error(void);
@@ -51,6 +51,8 @@
 unsigned long patch_espfix_desc(unsigned long, unsigned long);
 asmlinkage void math_emulate(long);
 
+void do_page_fault(struct pt_regs *regs, unsigned long error_code);
+
 #else /* CONFIG_X86_32 */
 
 asmlinkage void double_fault(void);
@@ -62,5 +64,7 @@
 asmlinkage void do_simd_coprocessor_error(struct pt_regs *);
 asmlinkage void do_spurious_interrupt_bug(struct pt_regs *);
 
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code);
+
 #endif /* CONFIG_X86_32 */
-#endif /* _ASM_X86_TRAPS_H */
+#endif /* ASM_X86__TRAPS_H */
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index cb6f6ee..ad0f5c4 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -1,8 +1,8 @@
 /*
  * x86 TSC related functions
  */
-#ifndef _ASM_X86_TSC_H
-#define _ASM_X86_TSC_H
+#ifndef ASM_X86__TSC_H
+#define ASM_X86__TSC_H
 
 #include <asm/processor.h>
 
@@ -59,4 +59,4 @@
 
 extern int notsc_setup(char *);
 
-#endif
+#endif /* ASM_X86__TSC_H */
diff --git a/include/asm-x86/types.h b/include/asm-x86/types.h
index 1ac80cd..e78b52e 100644
--- a/include/asm-x86/types.h
+++ b/include/asm-x86/types.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TYPES_H
-#define _ASM_X86_TYPES_H
+#ifndef ASM_X86__TYPES_H
+#define ASM_X86__TYPES_H
 
 #include <asm-generic/int-ll64.h>
 
@@ -33,4 +33,4 @@
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__TYPES_H */
diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 5f702d1..48ebc0a 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_UACCES_H_
-#define _ASM_UACCES_H_
+#ifndef ASM_X86__UACCESS_H
+#define ASM_X86__UACCESS_H
 /*
  * User space memory access functions
  */
@@ -450,5 +450,5 @@
 # include "uaccess_64.h"
 #endif
 
-#endif
+#endif /* ASM_X86__UACCESS_H */
 
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 6fdef39..6b5b57d 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -1,5 +1,5 @@
-#ifndef __i386_UACCESS_H
-#define __i386_UACCESS_H
+#ifndef ASM_X86__UACCESS_32_H
+#define ASM_X86__UACCESS_32_H
 
 /*
  * User space memory access functions
@@ -215,4 +215,4 @@
 unsigned long __must_check clear_user(void __user *mem, unsigned long len);
 unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
 
-#endif /* __i386_UACCESS_H */
+#endif /* ASM_X86__UACCESS_32_H */
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 45806d6..c96c1f5 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -1,5 +1,5 @@
-#ifndef __X86_64_UACCESS_H
-#define __X86_64_UACCESS_H
+#ifndef ASM_X86__UACCESS_64_H
+#define ASM_X86__UACCESS_64_H
 
 /*
  * User space memory access functions
@@ -199,4 +199,4 @@
 unsigned long
 copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
 
-#endif /* __X86_64_UACCESS_H */
+#endif /* ASM_X86__UACCESS_64_H */
diff --git a/include/asm-x86/ucontext.h b/include/asm-x86/ucontext.h
index 50a79f7..9948dd3 100644
--- a/include/asm-x86/ucontext.h
+++ b/include/asm-x86/ucontext.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UCONTEXT_H
-#define _ASM_X86_UCONTEXT_H
+#ifndef ASM_X86__UCONTEXT_H
+#define ASM_X86__UCONTEXT_H
 
 struct ucontext {
 	unsigned long	  uc_flags;
@@ -9,4 +9,4 @@
 	sigset_t	  uc_sigmask;	/* mask last for extensibility */
 };
 
-#endif /* _ASM_X86_UCONTEXT_H */
+#endif /* ASM_X86__UCONTEXT_H */
diff --git a/include/asm-x86/unaligned.h b/include/asm-x86/unaligned.h
index a7bd416..59dcdec 100644
--- a/include/asm-x86/unaligned.h
+++ b/include/asm-x86/unaligned.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UNALIGNED_H
-#define _ASM_X86_UNALIGNED_H
+#ifndef ASM_X86__UNALIGNED_H
+#define ASM_X86__UNALIGNED_H
 
 /*
  * The x86 can do unaligned accesses itself.
@@ -11,4 +11,4 @@
 #define get_unaligned __get_unaligned_le
 #define put_unaligned __put_unaligned_le
 
-#endif /* _ASM_X86_UNALIGNED_H */
+#endif /* ASM_X86__UNALIGNED_H */
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index d739467..017f4a8 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_I386_UNISTD_H_
-#define _ASM_I386_UNISTD_H_
+#ifndef ASM_X86__UNISTD_32_H
+#define ASM_X86__UNISTD_32_H
 
 /*
  * This file contains the system call numbers.
@@ -376,4 +376,4 @@
 #endif
 
 #endif /* __KERNEL__ */
-#endif /* _ASM_I386_UNISTD_H_ */
+#endif /* ASM_X86__UNISTD_32_H */
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index 3a341d7..ace83f1 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_UNISTD_H_
-#define _ASM_X86_64_UNISTD_H_
+#ifndef ASM_X86__UNISTD_64_H
+#define ASM_X86__UNISTD_64_H
 
 #ifndef __SYSCALL
 #define __SYSCALL(a, b)
@@ -690,4 +690,4 @@
 #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
 #endif	/* __KERNEL__ */
 
-#endif /* _ASM_X86_64_UNISTD_H_ */
+#endif /* ASM_X86__UNISTD_64_H */
diff --git a/include/asm-x86/unwind.h b/include/asm-x86/unwind.h
index 8b064bd..a215156 100644
--- a/include/asm-x86/unwind.h
+++ b/include/asm-x86/unwind.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UNWIND_H
-#define _ASM_X86_UNWIND_H
+#ifndef ASM_X86__UNWIND_H
+#define ASM_X86__UNWIND_H
 
 #define UNW_PC(frame) ((void)(frame), 0UL)
 #define UNW_SP(frame) ((void)(frame), 0UL)
@@ -10,4 +10,4 @@
 	return 0;
 }
 
-#endif /* _ASM_X86_UNWIND_H */
+#endif /* ASM_X86__UNWIND_H */
diff --git a/include/asm-x86/user32.h b/include/asm-x86/user32.h
index a3d9100..aa66c18 100644
--- a/include/asm-x86/user32.h
+++ b/include/asm-x86/user32.h
@@ -1,5 +1,5 @@
-#ifndef USER32_H
-#define USER32_H 1
+#ifndef ASM_X86__USER32_H
+#define ASM_X86__USER32_H
 
 /* IA32 compatible user structures for ptrace.
  * These should be used for 32bit coredumps too. */
@@ -67,4 +67,4 @@
 };
 
 
-#endif
+#endif /* ASM_X86__USER32_H */
diff --git a/include/asm-x86/user_32.h b/include/asm-x86/user_32.h
index d6e51ed..e0fe2f5 100644
--- a/include/asm-x86/user_32.h
+++ b/include/asm-x86/user_32.h
@@ -1,5 +1,5 @@
-#ifndef _I386_USER_H
-#define _I386_USER_H
+#ifndef ASM_X86__USER_32_H
+#define ASM_X86__USER_32_H
 
 #include <asm/page.h>
 /* Core file format: The core file is written in such a way that gdb
@@ -128,4 +128,4 @@
 #define HOST_TEXT_START_ADDR (u.start_code)
 #define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
 
-#endif /* _I386_USER_H */
+#endif /* ASM_X86__USER_32_H */
diff --git a/include/asm-x86/user_64.h b/include/asm-x86/user_64.h
index 6037b63..38b5799 100644
--- a/include/asm-x86/user_64.h
+++ b/include/asm-x86/user_64.h
@@ -1,5 +1,5 @@
-#ifndef _X86_64_USER_H
-#define _X86_64_USER_H
+#ifndef ASM_X86__USER_64_H
+#define ASM_X86__USER_64_H
 
 #include <asm/types.h>
 #include <asm/page.h>
@@ -134,4 +134,4 @@
 #define HOST_TEXT_START_ADDR (u.start_code)
 #define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
 
-#endif /* _X86_64_USER_H */
+#endif /* ASM_X86__USER_64_H */
diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h
index aa73362..7cd6d7e 100644
--- a/include/asm-x86/uv/bios.h
+++ b/include/asm-x86/uv/bios.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_BIOS_H
-#define _ASM_X86_BIOS_H
+#ifndef ASM_X86__UV__BIOS_H
+#define ASM_X86__UV__BIOS_H
 
 /*
  * BIOS layer definitions.
@@ -65,4 +65,4 @@
 		   unsigned long *drift_info);
 extern const char *x86_bios_strerror(long status);
 
-#endif /* _ASM_X86_BIOS_H */
+#endif /* ASM_X86__UV__BIOS_H */
diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h
index 610b6b3..77153fb 100644
--- a/include/asm-x86/uv/uv_bau.h
+++ b/include/asm-x86/uv/uv_bau.h
@@ -8,8 +8,8 @@
  * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
  */
 
-#ifndef __ASM_X86_UV_BAU__
-#define __ASM_X86_UV_BAU__
+#ifndef ASM_X86__UV__UV_BAU_H
+#define ASM_X86__UV__UV_BAU_H
 
 #include <linux/bitmap.h>
 #define BITSPERBYTE 8
@@ -329,4 +329,4 @@
 extern void uv_bau_message_intr1(void);
 extern void uv_bau_timeout_intr1(void);
 
-#endif /* __ASM_X86_UV_BAU__ */
+#endif /* ASM_X86__UV__UV_BAU_H */
diff --git a/include/asm-x86/uv/uv_hub.h b/include/asm-x86/uv/uv_hub.h
index a4ef26e..bdb5b01 100644
--- a/include/asm-x86/uv/uv_hub.h
+++ b/include/asm-x86/uv/uv_hub.h
@@ -8,8 +8,8 @@
  * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
  */
 
-#ifndef __ASM_X86_UV_HUB_H__
-#define __ASM_X86_UV_HUB_H__
+#ifndef ASM_X86__UV__UV_HUB_H
+#define ASM_X86__UV__UV_HUB_H
 
 #include <linux/numa.h>
 #include <linux/percpu.h>
@@ -350,5 +350,5 @@
 	return uv_possible_blades;
 }
 
-#endif /* __ASM_X86_UV_HUB__ */
+#endif /* ASM_X86__UV__UV_HUB_H */
 
diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h
index 151fd7f..8b03d89 100644
--- a/include/asm-x86/uv/uv_mmrs.h
+++ b/include/asm-x86/uv/uv_mmrs.h
@@ -8,8 +8,8 @@
  * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
  */
 
-#ifndef __ASM_X86_UV_MMRS__
-#define __ASM_X86_UV_MMRS__
+#ifndef ASM_X86__UV__UV_MMRS_H
+#define ASM_X86__UV__UV_MMRS_H
 
 #define UV_MMR_ENABLE		(1UL << 63)
 
@@ -1292,4 +1292,4 @@
 };
 
 
-#endif /* __ASM_X86_UV_MMRS__ */
+#endif /* ASM_X86__UV__UV_MMRS_H */
diff --git a/include/asm-x86/vdso.h b/include/asm-x86/vdso.h
index 8e18fb8..4ab3209 100644
--- a/include/asm-x86/vdso.h
+++ b/include/asm-x86/vdso.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_VDSO_H
-#define _ASM_X86_VDSO_H	1
+#ifndef ASM_X86__VDSO_H
+#define ASM_X86__VDSO_H
 
 #ifdef CONFIG_X86_64
 extern const char VDSO64_PRELINK[];
@@ -44,4 +44,4 @@
 extern const char vdso32_syscall_start, vdso32_syscall_end;
 extern const char vdso32_sysenter_start, vdso32_sysenter_end;
 
-#endif	/* asm-x86/vdso.h */
+#endif /* ASM_X86__VDSO_H */
diff --git a/include/asm-x86/vga.h b/include/asm-x86/vga.h
index 0ccf804..b9e493d 100644
--- a/include/asm-x86/vga.h
+++ b/include/asm-x86/vga.h
@@ -4,8 +4,8 @@
  *	(c) 1998 Martin Mares <mj@ucw.cz>
  */
 
-#ifndef _LINUX_ASM_VGA_H_
-#define _LINUX_ASM_VGA_H_
+#ifndef ASM_X86__VGA_H
+#define ASM_X86__VGA_H
 
 /*
  *	On the PC, we can just recalculate addresses and then
@@ -17,4 +17,4 @@
 #define vga_readb(x) (*(x))
 #define vga_writeb(x, y) (*(y) = (x))
 
-#endif
+#endif /* ASM_X86__VGA_H */
diff --git a/include/asm-x86/vgtod.h b/include/asm-x86/vgtod.h
index 3301f09..38fd133 100644
--- a/include/asm-x86/vgtod.h
+++ b/include/asm-x86/vgtod.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_VGTOD_H
-#define _ASM_VGTOD_H 1
+#ifndef ASM_X86__VGTOD_H
+#define ASM_X86__VGTOD_H
 
 #include <asm/vsyscall.h>
 #include <linux/clocksource.h>
@@ -26,4 +26,4 @@
 __section_vsyscall_gtod_data;
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
-#endif
+#endif /* ASM_X86__VGTOD_H */
diff --git a/include/asm-x86/visws/cobalt.h b/include/asm-x86/visws/cobalt.h
index 9952588..9627a8f 100644
--- a/include/asm-x86/visws/cobalt.h
+++ b/include/asm-x86/visws/cobalt.h
@@ -1,5 +1,5 @@
-#ifndef __I386_SGI_COBALT_H
-#define __I386_SGI_COBALT_H
+#ifndef ASM_X86__VISWS__COBALT_H
+#define ASM_X86__VISWS__COBALT_H
 
 #include <asm/fixmap.h>
 
@@ -122,4 +122,4 @@
 
 extern char visws_board_rev;
 
-#endif /* __I386_SGI_COBALT_H */
+#endif /* ASM_X86__VISWS__COBALT_H */
diff --git a/include/asm-x86/visws/lithium.h b/include/asm-x86/visws/lithium.h
index dfcd4f0..b36d3b3 100644
--- a/include/asm-x86/visws/lithium.h
+++ b/include/asm-x86/visws/lithium.h
@@ -1,5 +1,5 @@
-#ifndef __I386_SGI_LITHIUM_H
-#define __I386_SGI_LITHIUM_H
+#ifndef ASM_X86__VISWS__LITHIUM_H
+#define ASM_X86__VISWS__LITHIUM_H
 
 #include <asm/fixmap.h>
 
@@ -49,5 +49,5 @@
 	return *((volatile unsigned short *)(LI_PCIB_VADDR+reg));
 }
 
-#endif
+#endif /* ASM_X86__VISWS__LITHIUM_H */
 
diff --git a/include/asm-x86/visws/piix4.h b/include/asm-x86/visws/piix4.h
index 83ea4f4..61c9380 100644
--- a/include/asm-x86/visws/piix4.h
+++ b/include/asm-x86/visws/piix4.h
@@ -1,5 +1,5 @@
-#ifndef __I386_SGI_PIIX_H
-#define __I386_SGI_PIIX_H
+#ifndef ASM_X86__VISWS__PIIX4_H
+#define ASM_X86__VISWS__PIIX4_H
 
 /*
  * PIIX4 as used on SGI Visual Workstations
@@ -104,4 +104,4 @@
  */
 #define	PIIX_GPI_STPCLK		0x4	// STPCLK signal routed back in
 
-#endif
+#endif /* ASM_X86__VISWS__PIIX4_H */
diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h
index 5ce3513..998bd18 100644
--- a/include/asm-x86/vm86.h
+++ b/include/asm-x86/vm86.h
@@ -1,5 +1,5 @@
-#ifndef _LINUX_VM86_H
-#define _LINUX_VM86_H
+#ifndef ASM_X86__VM86_H
+#define ASM_X86__VM86_H
 
 /*
  * I'm guessing at the VIF/VIP flag usage, but hope that this is how
@@ -205,4 +205,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__VM86_H */
diff --git a/include/asm-x86/vmi_time.h b/include/asm-x86/vmi_time.h
index c3118c3..b2d39e6 100644
--- a/include/asm-x86/vmi_time.h
+++ b/include/asm-x86/vmi_time.h
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef __VMI_TIME_H
-#define __VMI_TIME_H
+#ifndef ASM_X86__VMI_TIME_H
+#define ASM_X86__VMI_TIME_H
 
 /*
  * Raw VMI call indices for timer functions
@@ -95,4 +95,4 @@
 
 #define CONFIG_VMI_ALARM_HZ	100
 
-#endif
+#endif /* ASM_X86__VMI_TIME_H */
diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h
index 6b66ff9..dcd4682 100644
--- a/include/asm-x86/vsyscall.h
+++ b/include/asm-x86/vsyscall.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_VSYSCALL_H_
-#define _ASM_X86_64_VSYSCALL_H_
+#ifndef ASM_X86__VSYSCALL_H
+#define ASM_X86__VSYSCALL_H
 
 enum vsyscall_num {
 	__NR_vgettimeofday,
@@ -41,4 +41,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif /* _ASM_X86_64_VSYSCALL_H_ */
+#endif /* ASM_X86__VSYSCALL_H */
diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h
index 8ded747..8151f5b 100644
--- a/include/asm-x86/xen/events.h
+++ b/include/asm-x86/xen/events.h
@@ -1,5 +1,5 @@
-#ifndef __XEN_EVENTS_H
-#define __XEN_EVENTS_H
+#ifndef ASM_X86__XEN__EVENTS_H
+#define ASM_X86__XEN__EVENTS_H
 
 enum ipi_vector {
 	XEN_RESCHEDULE_VECTOR,
@@ -21,4 +21,4 @@
 	do_IRQ(regs);
 }
 
-#endif /* __XEN_EVENTS_H */
+#endif /* ASM_X86__XEN__EVENTS_H */
diff --git a/include/asm-x86/xen/grant_table.h b/include/asm-x86/xen/grant_table.h
index 2444d45..c4baab4 100644
--- a/include/asm-x86/xen/grant_table.h
+++ b/include/asm-x86/xen/grant_table.h
@@ -1,7 +1,7 @@
-#ifndef __XEN_GRANT_TABLE_H
-#define __XEN_GRANT_TABLE_H
+#ifndef ASM_X86__XEN__GRANT_TABLE_H
+#define ASM_X86__XEN__GRANT_TABLE_H
 
 #define xen_alloc_vm_area(size)	alloc_vm_area(size)
 #define xen_free_vm_area(area)	free_vm_area(area)
 
-#endif /* __XEN_GRANT_TABLE_H */
+#endif /* ASM_X86__XEN__GRANT_TABLE_H */
diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index 91cb7fd..44f4259 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -30,8 +30,8 @@
  * IN THE SOFTWARE.
  */
 
-#ifndef __HYPERCALL_H__
-#define __HYPERCALL_H__
+#ifndef ASM_X86__XEN__HYPERCALL_H
+#define ASM_X86__XEN__HYPERCALL_H
 
 #include <linux/errno.h>
 #include <linux/string.h>
@@ -524,4 +524,4 @@
 	mcl->args[1] = esp;
 }
 
-#endif /* __HYPERCALL_H__ */
+#endif /* ASM_X86__XEN__HYPERCALL_H */
diff --git a/include/asm-x86/xen/hypervisor.h b/include/asm-x86/xen/hypervisor.h
index 04ee061..0ef3a88 100644
--- a/include/asm-x86/xen/hypervisor.h
+++ b/include/asm-x86/xen/hypervisor.h
@@ -30,8 +30,8 @@
  * IN THE SOFTWARE.
  */
 
-#ifndef __HYPERVISOR_H__
-#define __HYPERVISOR_H__
+#ifndef ASM_X86__XEN__HYPERVISOR_H
+#define ASM_X86__XEN__HYPERVISOR_H
 
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -69,4 +69,4 @@
 
 #define is_running_on_xen()	(xen_start_info ? 1 : 0)
 
-#endif /* __HYPERVISOR_H__ */
+#endif /* ASM_X86__XEN__HYPERVISOR_H */
diff --git a/include/asm-x86/xen/interface.h b/include/asm-x86/xen/interface.h
index 9d810f2..d077bba 100644
--- a/include/asm-x86/xen/interface.h
+++ b/include/asm-x86/xen/interface.h
@@ -6,8 +6,8 @@
  * Copyright (c) 2004, K A Fraser
  */
 
-#ifndef __ASM_X86_XEN_INTERFACE_H
-#define __ASM_X86_XEN_INTERFACE_H
+#ifndef ASM_X86__XEN__INTERFACE_H
+#define ASM_X86__XEN__INTERFACE_H
 
 #ifdef __XEN__
 #define __DEFINE_GUEST_HANDLE(name, type) \
@@ -172,4 +172,4 @@
 #define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"
 #endif
 
-#endif	/* __ASM_X86_XEN_INTERFACE_H */
+#endif /* ASM_X86__XEN__INTERFACE_H */
diff --git a/include/asm-x86/xen/interface_32.h b/include/asm-x86/xen/interface_32.h
index d8ac41d..08167e1 100644
--- a/include/asm-x86/xen/interface_32.h
+++ b/include/asm-x86/xen/interface_32.h
@@ -6,8 +6,8 @@
  * Copyright (c) 2004, K A Fraser
  */
 
-#ifndef __ASM_X86_XEN_INTERFACE_32_H
-#define __ASM_X86_XEN_INTERFACE_32_H
+#ifndef ASM_X86__XEN__INTERFACE_32_H
+#define ASM_X86__XEN__INTERFACE_32_H
 
 
 /*
@@ -94,4 +94,4 @@
 #define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
 #define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
 
-#endif	/* __ASM_X86_XEN_INTERFACE_32_H */
+#endif /* ASM_X86__XEN__INTERFACE_32_H */
diff --git a/include/asm-x86/xen/interface_64.h b/include/asm-x86/xen/interface_64.h
index 842266c..046c0f1 100644
--- a/include/asm-x86/xen/interface_64.h
+++ b/include/asm-x86/xen/interface_64.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_X86_XEN_INTERFACE_64_H
-#define __ASM_X86_XEN_INTERFACE_64_H
+#ifndef ASM_X86__XEN__INTERFACE_64_H
+#define ASM_X86__XEN__INTERFACE_64_H
 
 /*
  * 64-bit segment selectors
@@ -156,4 +156,4 @@
 #endif /* !__ASSEMBLY__ */
 
 
-#endif	/* __ASM_X86_XEN_INTERFACE_64_H */
+#endif /* ASM_X86__XEN__INTERFACE_64_H */
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index 7b3835d..c50185d 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -1,5 +1,5 @@
-#ifndef __XEN_PAGE_H
-#define __XEN_PAGE_H
+#ifndef ASM_X86__XEN__PAGE_H
+#define ASM_X86__XEN__PAGE_H
 
 #include <linux/pfn.h>
 
@@ -162,4 +162,4 @@
 void make_lowmem_page_readonly(void *vaddr);
 void make_lowmem_page_readwrite(void *vaddr);
 
-#endif /* __XEN_PAGE_H */
+#endif /* ASM_X86__XEN__PAGE_H */
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index b68ec09..31474e8 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -180,6 +180,7 @@
 unifdef-y += auto_fs.h
 unifdef-y += auxvec.h
 unifdef-y += binfmts.h
+unifdef-y += blktrace_api.h
 unifdef-y += capability.h
 unifdef-y += capi.h
 unifdef-y += cciss_ioctl.h
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 8a12d71..be00973d 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -88,6 +88,7 @@
 	ATA_ID_DLF		= 128,
 	ATA_ID_CSFO		= 129,
 	ATA_ID_CFA_POWER	= 160,
+	ATA_ID_ROT_SPEED	= 217,
 	ATA_ID_PIO4		= (1 << 1),
 
 	ATA_ID_SERNO_LEN	= 20,
@@ -667,6 +668,15 @@
 	return 0;
 }
 
+static inline int ata_id_has_unload(const u16 *id)
+{
+	if (ata_id_major_version(id) >= 7 &&
+	    (id[ATA_ID_CFSSE] & 0xC000) == 0x4000 &&
+	    id[ATA_ID_CFSSE] & (1 << 13))
+		return 1;
+	return 0;
+}
+
 static inline int ata_id_current_chs_valid(const u16 *id)
 {
 	/* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command
@@ -691,6 +701,11 @@
 	return 0;
 }
 
+static inline int ata_id_is_ssd(const u16 *id)
+{
+	return id[ATA_ID_ROT_SPEED] == 0x01;
+}
+
 static inline int ata_drive_40wire(const u16 *dev_id)
 {
 	if (ata_id_is_sata(dev_id))
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 0933a14..ff5b4cf 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -26,21 +26,8 @@
 
 #ifdef CONFIG_BLOCK
 
-/* Platforms may set this to teach the BIO layer about IOMMU hardware. */
 #include <asm/io.h>
 
-#if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY)
-#define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1))
-#define BIOVEC_VIRT_OVERSIZE(x)	((x) > BIO_VMERGE_MAX_SIZE)
-#else
-#define BIOVEC_VIRT_START_SIZE(x)	0
-#define BIOVEC_VIRT_OVERSIZE(x)		0
-#endif
-
-#ifndef BIO_VMERGE_BOUNDARY
-#define BIO_VMERGE_BOUNDARY	0
-#endif
-
 #define BIO_DEBUG
 
 #ifdef BIO_DEBUG
@@ -88,25 +75,14 @@
 	/* Number of segments in this BIO after
 	 * physical address coalescing is performed.
 	 */
-	unsigned short		bi_phys_segments;
-
-	/* Number of segments after physical and DMA remapping
-	 * hardware coalescing is performed.
-	 */
-	unsigned short		bi_hw_segments;
+	unsigned int		bi_phys_segments;
 
 	unsigned int		bi_size;	/* residual I/O count */
 
-	/*
-	 * To keep track of the max hw size, we account for the
-	 * sizes of the first and last virtually mergeable segments
-	 * in this bio
-	 */
-	unsigned int		bi_hw_front_size;
-	unsigned int		bi_hw_back_size;
-
 	unsigned int		bi_max_vecs;	/* max bvl_vecs we can hold */
 
+	unsigned int		bi_comp_cpu;	/* completion CPU */
+
 	struct bio_vec		*bi_io_vec;	/* the actual vec list */
 
 	bio_end_io_t		*bi_end_io;
@@ -126,11 +102,14 @@
 #define BIO_UPTODATE	0	/* ok after I/O completion */
 #define BIO_RW_BLOCK	1	/* RW_AHEAD set, and read/write would block */
 #define BIO_EOF		2	/* out-out-bounds error */
-#define BIO_SEG_VALID	3	/* nr_hw_seg valid */
+#define BIO_SEG_VALID	3	/* bi_phys_segments valid */
 #define BIO_CLONED	4	/* doesn't own data */
 #define BIO_BOUNCED	5	/* bio is a bounce bio */
 #define BIO_USER_MAPPED 6	/* contains user pages */
 #define BIO_EOPNOTSUPP	7	/* not supported */
+#define BIO_CPU_AFFINE	8	/* complete bio on same CPU as submitted */
+#define BIO_NULL_MAPPED 9	/* contains invalid user pages */
+#define BIO_FS_INTEGRITY 10	/* fs owns integrity data, not block layer */
 #define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
 
 /*
@@ -144,18 +123,31 @@
 /*
  * bio bi_rw flags
  *
- * bit 0 -- read (not set) or write (set)
+ * bit 0 -- data direction
+ *	If not set, bio is a read from device. If set, it's a write to device.
  * bit 1 -- rw-ahead when set
  * bit 2 -- barrier
+ *	Insert a serialization point in the IO queue, forcing previously
+ *	submitted IO to be completed before this oen is issued.
  * bit 3 -- fail fast, don't want low level driver retries
  * bit 4 -- synchronous I/O hint: the block layer will unplug immediately
+ *	Note that this does NOT indicate that the IO itself is sync, just
+ *	that the block layer will not postpone issue of this IO by plugging.
+ * bit 5 -- metadata request
+ *	Used for tracing to differentiate metadata and data IO. May also
+ *	get some preferential treatment in the IO scheduler
+ * bit 6 -- discard sectors
+ *	Informs the lower level device that this range of sectors is no longer
+ *	used by the file system and may thus be freed by the device. Used
+ *	for flash based storage.
  */
-#define BIO_RW		0
-#define BIO_RW_AHEAD	1
+#define BIO_RW		0	/* Must match RW in req flags (blkdev.h) */
+#define BIO_RW_AHEAD	1	/* Must match FAILFAST in req flags */
 #define BIO_RW_BARRIER	2
 #define BIO_RW_FAILFAST	3
 #define BIO_RW_SYNC	4
 #define BIO_RW_META	5
+#define BIO_RW_DISCARD	6
 
 /*
  * upper 16 bits of bi_rw define the io priority of this bio
@@ -185,14 +177,15 @@
 #define bio_failfast(bio)	((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
 #define bio_rw_ahead(bio)	((bio)->bi_rw & (1 << BIO_RW_AHEAD))
 #define bio_rw_meta(bio)	((bio)->bi_rw & (1 << BIO_RW_META))
-#define bio_empty_barrier(bio)	(bio_barrier(bio) && !(bio)->bi_size)
+#define bio_discard(bio)	((bio)->bi_rw & (1 << BIO_RW_DISCARD))
+#define bio_empty_barrier(bio)	(bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio))
 
 static inline unsigned int bio_cur_sectors(struct bio *bio)
 {
 	if (bio->bi_vcnt)
 		return bio_iovec(bio)->bv_len >> 9;
-
-	return 0;
+	else /* dataless requests such as discard */
+		return bio->bi_size >> 9;
 }
 
 static inline void *bio_data(struct bio *bio)
@@ -236,8 +229,6 @@
 	((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
 #endif
 
-#define BIOVEC_VIRT_MERGEABLE(vec1, vec2)	\
-	((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0)
 #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
 	(((addr1) | (mask)) == (((addr2) - 1) | (mask)))
 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
@@ -319,15 +310,14 @@
 	atomic_t			cnt;
 	int				error;
 };
-extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool,
-				  int first_sectors);
-extern mempool_t *bio_split_pool;
+extern struct bio_pair *bio_split(struct bio *bi, int first_sectors);
 extern void bio_pair_release(struct bio_pair *dbio);
 
 extern struct bio_set *bioset_create(int, int);
 extern void bioset_free(struct bio_set *);
 
 extern struct bio *bio_alloc(gfp_t, int);
+extern struct bio *bio_kmalloc(gfp_t, int);
 extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
 extern void bio_put(struct bio *);
 extern void bio_free(struct bio *, struct bio_set *);
@@ -335,7 +325,6 @@
 extern void bio_endio(struct bio *, int);
 struct request_queue;
 extern int bio_phys_segments(struct request_queue *, struct bio *);
-extern int bio_hw_segments(struct request_queue *, struct bio *);
 
 extern void __bio_clone(struct bio *, struct bio *);
 extern struct bio *bio_clone(struct bio *, gfp_t);
@@ -346,12 +335,14 @@
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
 extern int bio_get_nr_vecs(struct block_device *);
+extern sector_t bio_sector_offset(struct bio *, unsigned short, unsigned int);
 extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
-				unsigned long, unsigned int, int);
+				unsigned long, unsigned int, int, gfp_t);
 struct sg_iovec;
+struct rq_map_data;
 extern struct bio *bio_map_user_iov(struct request_queue *,
 				    struct block_device *,
-				    struct sg_iovec *, int, int);
+				    struct sg_iovec *, int, int, gfp_t);
 extern void bio_unmap_user(struct bio *);
 extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
 				gfp_t);
@@ -359,15 +350,25 @@
 				 gfp_t, int);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
-extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int);
-extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *,
-				     int, int);
+extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
+				 unsigned long, unsigned int, int, gfp_t);
+extern struct bio *bio_copy_user_iov(struct request_queue *,
+				     struct rq_map_data *, struct sg_iovec *,
+				     int, int, gfp_t);
 extern int bio_uncopy_user(struct bio *);
 void zero_fill_bio(struct bio *bio);
 extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
 extern unsigned int bvec_nr_vecs(unsigned short idx);
 
 /*
+ * Allow queuer to specify a completion CPU for this bio
+ */
+static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
+{
+	bio->bi_comp_cpu = cpu;
+}
+
+/*
  * bio_set is used to allow other portions of the IO system to
  * allocate their own private memory pools for bio and iovec structures.
  * These memory pools in turn all allocate from the bio_slab
@@ -445,6 +446,14 @@
 	__bio_kmap_irq((bio), (bio)->bi_idx, (flags))
 #define bio_kunmap_irq(buf,flags)	__bio_kunmap_irq(buf, flags)
 
+/*
+ * Check whether this bio carries any data or not. A NULL bio is allowed.
+ */
+static inline int bio_has_data(struct bio *bio)
+{
+	return bio && bio->bi_io_vec != NULL;
+}
+
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 
 #define bip_vec_idx(bip, idx)	(&(bip->bip_vec[(idx)]))
@@ -458,14 +467,7 @@
 #define bip_for_each_vec(bvl, bip, i)					\
 	__bip_for_each_vec(bvl, bip, i, (bip)->bip_idx)
 
-static inline int bio_integrity(struct bio *bio)
-{
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-	return bio->bi_integrity != NULL;
-#else
-	return 0;
-#endif
-}
+#define bio_integrity(bio) (bio->bi_integrity != NULL)
 
 extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *);
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 53ea933..a92d9e4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -16,7 +16,9 @@
 #include <linux/bio.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
+#include <linux/gfp.h>
 #include <linux/bsg.h>
+#include <linux/smp.h>
 
 #include <asm/scatterlist.h>
 
@@ -54,7 +56,6 @@
 	REQ_TYPE_PM_SUSPEND,		/* suspend request */
 	REQ_TYPE_PM_RESUME,		/* resume request */
 	REQ_TYPE_PM_SHUTDOWN,		/* shutdown request */
-	REQ_TYPE_FLUSH,			/* flush request */
 	REQ_TYPE_SPECIAL,		/* driver defined type */
 	REQ_TYPE_LINUX_BLOCK,		/* generic block layer message */
 	/*
@@ -76,19 +77,18 @@
  *
  */
 enum {
-	/*
-	 * just examples for now
-	 */
 	REQ_LB_OP_EJECT	= 0x40,		/* eject request */
-	REQ_LB_OP_FLUSH = 0x41,		/* flush device */
+	REQ_LB_OP_FLUSH = 0x41,		/* flush request */
+	REQ_LB_OP_DISCARD = 0x42,	/* discard sectors */
 };
 
 /*
- * request type modified bits. first three bits match BIO_RW* bits, important
+ * request type modified bits. first two bits match BIO_RW* bits, important
  */
 enum rq_flag_bits {
 	__REQ_RW,		/* not set, read. set, write */
 	__REQ_FAILFAST,		/* no low level driver retries */
+	__REQ_DISCARD,		/* request to discard sectors */
 	__REQ_SORTED,		/* elevator knows about this request */
 	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
 	__REQ_HARDBARRIER,	/* may not be passed by drive either */
@@ -111,6 +111,7 @@
 };
 
 #define REQ_RW		(1 << __REQ_RW)
+#define REQ_DISCARD	(1 << __REQ_DISCARD)
 #define REQ_FAILFAST	(1 << __REQ_FAILFAST)
 #define REQ_SORTED	(1 << __REQ_SORTED)
 #define REQ_SOFTBARRIER	(1 << __REQ_SOFTBARRIER)
@@ -140,12 +141,14 @@
  */
 struct request {
 	struct list_head queuelist;
-	struct list_head donelist;
+	struct call_single_data csd;
+	int cpu;
 
 	struct request_queue *q;
 
 	unsigned int cmd_flags;
 	enum rq_cmd_type_bits cmd_type;
+	unsigned long atomic_flags;
 
 	/* Maintain bio traversal state for part by part I/O submission.
 	 * hard_* are block layer internals, no driver should touch them!
@@ -190,13 +193,6 @@
 	 */
 	unsigned short nr_phys_segments;
 
-	/* Number of scatter-gather addr+len pairs after
-	 * physical and DMA remapping hardware coalescing is performed.
-	 * This is the number of scatter-gather entries the driver
-	 * will actually have to deal with after DMA mapping is done.
-	 */
-	unsigned short nr_hw_segments;
-
 	unsigned short ioprio;
 
 	void *special;
@@ -220,6 +216,8 @@
 	void *data;
 	void *sense;
 
+	unsigned long deadline;
+	struct list_head timeout_list;
 	unsigned int timeout;
 	int retries;
 
@@ -233,6 +231,11 @@
 	struct request *next_rq;
 };
 
+static inline unsigned short req_get_ioprio(struct request *req)
+{
+	return req->ioprio;
+}
+
 /*
  * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
  * requests. Some step values could eventually be made generic.
@@ -252,6 +255,7 @@
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unplug_fn) (struct request_queue *);
+typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -265,6 +269,15 @@
 typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
 typedef void (softirq_done_fn)(struct request *);
 typedef int (dma_drain_needed_fn)(struct request *);
+typedef int (lld_busy_fn) (struct request_queue *q);
+
+enum blk_eh_timer_return {
+	BLK_EH_NOT_HANDLED,
+	BLK_EH_HANDLED,
+	BLK_EH_RESET_TIMER,
+};
+
+typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *);
 
 enum blk_queue_state {
 	Queue_down,
@@ -307,10 +320,13 @@
 	make_request_fn		*make_request_fn;
 	prep_rq_fn		*prep_rq_fn;
 	unplug_fn		*unplug_fn;
+	prepare_discard_fn	*prepare_discard_fn;
 	merge_bvec_fn		*merge_bvec_fn;
 	prepare_flush_fn	*prepare_flush_fn;
 	softirq_done_fn		*softirq_done_fn;
+	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
+	lld_busy_fn		*lld_busy_fn;
 
 	/*
 	 * Dispatch queue sorting
@@ -385,6 +401,10 @@
 	unsigned int		nr_sorted;
 	unsigned int		in_flight;
 
+	unsigned int		rq_timeout;
+	struct timer_list	timeout;
+	struct list_head	timeout_list;
+
 	/*
 	 * sg stuff
 	 */
@@ -421,6 +441,10 @@
 #define QUEUE_FLAG_ELVSWITCH	8	/* don't use elevator, just do FIFO */
 #define QUEUE_FLAG_BIDI		9	/* queue supports bidi requests */
 #define QUEUE_FLAG_NOMERGES    10	/* disable merge attempts */
+#define QUEUE_FLAG_SAME_COMP   11	/* force complete on same CPU */
+#define QUEUE_FLAG_FAIL_IO     12	/* fake timeout */
+#define QUEUE_FLAG_STACKABLE   13	/* supports request stacking */
+#define QUEUE_FLAG_NONROT      14	/* non-rotational device (SSD) */
 
 static inline int queue_is_locked(struct request_queue *q)
 {
@@ -526,7 +550,10 @@
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
+#define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
 #define blk_queue_flushing(q)	((q)->ordseq)
+#define blk_queue_stackable(q)	\
+	test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
 
 #define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
 #define blk_pc_request(rq)	((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
@@ -536,16 +563,18 @@
 #define blk_noretry_request(rq)	((rq)->cmd_flags & REQ_FAILFAST)
 #define blk_rq_started(rq)	((rq)->cmd_flags & REQ_STARTED)
 
-#define blk_account_rq(rq)	(blk_rq_started(rq) && blk_fs_request(rq))
+#define blk_account_rq(rq)	(blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) 
 
 #define blk_pm_suspend_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
 #define blk_pm_resume_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_RESUME)
 #define blk_pm_request(rq)	\
 	(blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
 
+#define blk_rq_cpu_valid(rq)	((rq)->cpu != -1)
 #define blk_sorted_rq(rq)	((rq)->cmd_flags & REQ_SORTED)
 #define blk_barrier_rq(rq)	((rq)->cmd_flags & REQ_HARDBARRIER)
 #define blk_fua_rq(rq)		((rq)->cmd_flags & REQ_FUA)
+#define blk_discard_rq(rq)	((rq)->cmd_flags & REQ_DISCARD)
 #define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
 #define blk_empty_barrier(rq)	(blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
 /* rq->queuelist of dequeued request must be list_empty() */
@@ -592,7 +621,8 @@
 #define RQ_NOMERGE_FLAGS	\
 	(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
 #define rq_mergeable(rq)	\
-	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
+	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
+	 (blk_discard_rq(rq) || blk_fs_request((rq))))
 
 /*
  * q->prep_rq_fn return values
@@ -637,6 +667,12 @@
 }
 #endif /* CONFIG_MMU */
 
+struct rq_map_data {
+	struct page **pages;
+	int page_order;
+	int nr_entries;
+};
+
 struct req_iterator {
 	int i;
 	struct bio *bio;
@@ -664,6 +700,10 @@
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
+extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
+extern int blk_lld_busy(struct request_queue *q);
+extern int blk_insert_cloned_request(struct request_queue *q,
+				     struct request *rq);
 extern void blk_plug_device(struct request_queue *);
 extern void blk_plug_device_unlocked(struct request_queue *);
 extern int blk_remove_plug(struct request_queue *);
@@ -705,11 +745,14 @@
 extern void __blk_run_queue(struct request_queue *);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_start_queueing(struct request_queue *);
-extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long);
+extern int blk_rq_map_user(struct request_queue *, struct request *,
+			   struct rq_map_data *, void __user *, unsigned long,
+			   gfp_t);
 extern int blk_rq_unmap_user(struct bio *);
 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
 extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
-			       struct sg_iovec *, int, unsigned int);
+			       struct rq_map_data *, struct sg_iovec *, int,
+			       unsigned int, gfp_t);
 extern int blk_execute_rq(struct request_queue *, struct gendisk *,
 			  struct request *, int);
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
@@ -750,12 +793,15 @@
 extern int blk_end_bidi_request(struct request *rq, int error,
 				unsigned int nr_bytes, unsigned int bidi_bytes);
 extern void end_request(struct request *, int);
-extern void end_queued_request(struct request *, int);
-extern void end_dequeued_request(struct request *, int);
 extern int blk_end_request_callback(struct request *rq, int error,
 				unsigned int nr_bytes,
 				int (drv_callback)(struct request *));
 extern void blk_complete_request(struct request *);
+extern void __blk_complete_request(struct request *);
+extern void blk_abort_request(struct request *);
+extern void blk_abort_queue(struct request_queue *);
+extern void blk_update_request(struct request *rq, int error,
+			       unsigned int nr_bytes);
 
 /*
  * blk_end_request() takes bytes instead of sectors as a complete size.
@@ -790,12 +836,16 @@
 extern int blk_queue_dma_drain(struct request_queue *q,
 			       dma_drain_needed_fn *dma_drain_needed,
 			       void *buf, unsigned int size);
+extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
+extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
+extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
+extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
 extern int blk_do_ordered(struct request_queue *, struct request **);
@@ -837,6 +887,16 @@
 }
 
 extern int blkdev_issue_flush(struct block_device *, sector_t *);
+extern int blkdev_issue_discard(struct block_device *,
+				sector_t sector, sector_t nr_sects, gfp_t);
+
+static inline int sb_issue_discard(struct super_block *sb,
+				   sector_t block, sector_t nr_blocks)
+{
+	block <<= (sb->s_blocksize_bits - 9);
+	nr_blocks <<= (sb->s_blocksize_bits - 9);
+	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL);
+}
 
 /*
 * command filter functions
@@ -874,6 +934,13 @@
 	return q ? q->dma_alignment : 511;
 }
 
+static inline int blk_rq_aligned(struct request_queue *q, void *addr,
+				 unsigned int len)
+{
+	unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
+	return !((unsigned long)addr & alignment) && !(len & alignment);
+}
+
 /* assumes size > 256 */
 static inline unsigned int blksize_bits(unsigned int size)
 {
@@ -900,7 +967,7 @@
 }
 
 struct work_struct;
-int kblockd_schedule_work(struct work_struct *work);
+int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
 void kblockd_flush_work(struct work_struct *work);
 
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
@@ -945,49 +1012,19 @@
 
 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
 extern void blk_integrity_unregister(struct gendisk *);
-extern int blk_integrity_compare(struct block_device *, struct block_device *);
+extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
 extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
 extern int blk_rq_count_integrity_sg(struct request *);
 
-static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
-{
-	if (bi)
-		return bi->tuple_size;
-
-	return 0;
-}
-
-static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
+static inline
+struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
 {
 	return bdev->bd_disk->integrity;
 }
 
-static inline unsigned int bdev_get_tag_size(struct block_device *bdev)
+static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 {
-	struct blk_integrity *bi = bdev_get_integrity(bdev);
-
-	if (bi)
-		return bi->tag_size;
-
-	return 0;
-}
-
-static inline int bdev_integrity_enabled(struct block_device *bdev, int rw)
-{
-	struct blk_integrity *bi = bdev_get_integrity(bdev);
-
-	if (bi == NULL)
-		return 0;
-
-	if (rw == READ && bi->verify_fn != NULL &&
-	    (bi->flags & INTEGRITY_FLAG_READ))
-		return 1;
-
-	if (rw == WRITE && bi->generate_fn != NULL &&
-	    (bi->flags & INTEGRITY_FLAG_WRITE))
-		return 1;
-
-	return 0;
+	return disk->integrity;
 }
 
 static inline int blk_integrity_rq(struct request *rq)
@@ -1004,7 +1041,7 @@
 #define blk_rq_count_integrity_sg(a)		(0)
 #define blk_rq_map_integrity_sg(a, b)		(0)
 #define bdev_get_integrity(a)			(0)
-#define bdev_get_tag_size(a)			(0)
+#define blk_get_integrity(a)			(0)
 #define blk_integrity_compare(a, b)		(0)
 #define blk_integrity_register(a, b)		(0)
 #define blk_integrity_unregister(a)		do { } while (0);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index d084b8d..3a31eb5 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -1,8 +1,10 @@
 #ifndef BLKTRACE_H
 #define BLKTRACE_H
 
+#ifdef __KERNEL__
 #include <linux/blkdev.h>
 #include <linux/relay.h>
+#endif
 
 /*
  * Trace categories
@@ -21,6 +23,7 @@
 	BLK_TC_NOTIFY	= 1 << 10,	/* special message */
 	BLK_TC_AHEAD	= 1 << 11,	/* readahead */
 	BLK_TC_META	= 1 << 12,	/* metadata */
+	BLK_TC_DISCARD	= 1 << 13,	/* discard requests */
 
 	BLK_TC_END	= 1 << 15,	/* only 16-bits, reminder */
 };
@@ -47,6 +50,7 @@
 	__BLK_TA_SPLIT,			/* bio was split */
 	__BLK_TA_BOUNCE,		/* bio was bounced */
 	__BLK_TA_REMAP,			/* bio was remapped */
+	__BLK_TA_ABORT,			/* request aborted */
 };
 
 /*
@@ -77,6 +81,7 @@
 #define BLK_TA_SPLIT		(__BLK_TA_SPLIT)
 #define BLK_TA_BOUNCE		(__BLK_TA_BOUNCE)
 #define BLK_TA_REMAP		(__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_ABORT		(__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE))
 
 #define BLK_TN_PROCESS		(__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
 #define BLK_TN_TIMESTAMP	(__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
@@ -89,17 +94,17 @@
  * The trace itself
  */
 struct blk_io_trace {
-	u32 magic;		/* MAGIC << 8 | version */
-	u32 sequence;		/* event number */
-	u64 time;		/* in microseconds */
-	u64 sector;		/* disk offset */
-	u32 bytes;		/* transfer length */
-	u32 action;		/* what happened */
-	u32 pid;		/* who did it */
-	u32 device;		/* device number */
-	u32 cpu;		/* on what cpu did it happen */
-	u16 error;		/* completion error */
-	u16 pdu_len;		/* length of data after this trace */
+	__u32 magic;		/* MAGIC << 8 | version */
+	__u32 sequence;		/* event number */
+	__u64 time;		/* in microseconds */
+	__u64 sector;		/* disk offset */
+	__u32 bytes;		/* transfer length */
+	__u32 action;		/* what happened */
+	__u32 pid;		/* who did it */
+	__u32 device;		/* device number */
+	__u32 cpu;		/* on what cpu did it happen */
+	__u16 error;		/* completion error */
+	__u16 pdu_len;		/* length of data after this trace */
 };
 
 /*
@@ -117,6 +122,23 @@
 	Blktrace_stopped,
 };
 
+#define BLKTRACE_BDEV_SIZE	32
+
+/*
+ * User setup structure passed with BLKTRACESTART
+ */
+struct blk_user_trace_setup {
+	char name[BLKTRACE_BDEV_SIZE];	/* output */
+	__u16 act_mask;			/* input */
+	__u32 buf_size;			/* input */
+	__u32 buf_nr;			/* input */
+	__u64 start_lba;
+	__u64 end_lba;
+	__u32 pid;
+};
+
+#ifdef __KERNEL__
+#if defined(CONFIG_BLK_DEV_IO_TRACE)
 struct blk_trace {
 	int trace_state;
 	struct rchan *rchan;
@@ -133,21 +155,6 @@
 	atomic_t dropped;
 };
 
-/*
- * User setup structure passed with BLKTRACESTART
- */
-struct blk_user_trace_setup {
-	char name[BDEVNAME_SIZE];	/* output */
-	u16 act_mask;			/* input */
-	u32 buf_size;			/* input */
-	u32 buf_nr;			/* input */
-	u64 start_lba;
-	u64 end_lba;
-	u32 pid;
-};
-
-#ifdef __KERNEL__
-#if defined(CONFIG_BLK_DEV_IO_TRACE)
 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
 extern void blk_trace_shutdown(struct request_queue *);
 extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
@@ -195,6 +202,9 @@
 	if (likely(!bt))
 		return;
 
+	if (blk_discard_rq(rq))
+		rw |= (1 << BIO_RW_DISCARD);
+
 	if (blk_pc_request(rq)) {
 		what |= BLK_TC_ACT(BLK_TC_PC);
 		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
diff --git a/include/linux/device.h b/include/linux/device.h
index 4d8372d..246937c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -199,6 +199,11 @@
 	struct class_private *p;
 };
 
+struct class_dev_iter {
+	struct klist_iter		ki;
+	const struct device_type	*type;
+};
+
 extern struct kobject *sysfs_dev_block_kobj;
 extern struct kobject *sysfs_dev_char_kobj;
 extern int __must_check __class_register(struct class *class,
@@ -213,6 +218,13 @@
 	__class_register(class, &__key);	\
 })
 
+extern void class_dev_iter_init(struct class_dev_iter *iter,
+				struct class *class,
+				struct device *start,
+				const struct device_type *type);
+extern struct device *class_dev_iter_next(struct class_dev_iter *iter);
+extern void class_dev_iter_exit(struct class_dev_iter *iter);
+
 extern int class_for_each_device(struct class *class, struct device *start,
 				 void *data,
 				 int (*fn)(struct device *dev, void *data));
@@ -396,7 +408,7 @@
 	spinlock_t		devres_lock;
 	struct list_head	devres_head;
 
-	struct list_head	node;
+	struct klist_node	knode_class;
 	struct class		*class;
 	dev_t			devt;	/* dev_t, creates the sysfs "dev" */
 	struct attribute_group	**groups;	/* optional groups */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 639624b..92f6f63 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -112,6 +112,7 @@
 extern int elv_register_queue(struct request_queue *q);
 extern void elv_unregister_queue(struct request_queue *q);
 extern int elv_may_queue(struct request_queue *, int);
+extern void elv_abort_queue(struct request_queue *);
 extern void elv_completed_request(struct request_queue *, struct request *);
 extern int elv_set_request(struct request_queue *, struct request *, gfp_t);
 extern void elv_put_request(struct request_queue *, struct request *);
@@ -173,15 +174,15 @@
 #define rb_entry_rq(node)	rb_entry((node), struct request, rb_node)
 
 /*
- * Hack to reuse the donelist list_head as the fifo time holder while
+ * Hack to reuse the csd.list list_head as the fifo time holder while
  * the request is in the io scheduler. Saves an unsigned long in rq.
  */
-#define rq_fifo_time(rq)	((unsigned long) (rq)->donelist.next)
-#define rq_set_fifo_time(rq,exp)	((rq)->donelist.next = (void *) (exp))
+#define rq_fifo_time(rq)	((unsigned long) (rq)->csd.list.next)
+#define rq_set_fifo_time(rq,exp)	((rq)->csd.list.next = (void *) (exp))
 #define rq_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
 #define rq_fifo_clear(rq)	do {		\
 	list_del_init(&(rq)->queuelist);	\
-	INIT_LIST_HEAD(&(rq)->donelist);	\
+	INIT_LIST_HEAD(&(rq)->csd.list);	\
 	} while (0)
 
 /*
diff --git a/include/linux/fd.h b/include/linux/fd.h
index b6bd41d..f5d194a 100644
--- a/include/linux/fd.h
+++ b/include/linux/fd.h
@@ -15,10 +15,16 @@
 			sect,		/* sectors per track */
 			head,		/* nr of heads */
 			track,		/* nr of tracks */
-			stretch;	/* !=0 means double track steps */
+			stretch;	/* bit 0 !=0 means double track steps */
+					/* bit 1 != 0 means swap sides */
+					/* bits 2..9 give the first sector */
+					/*  number (the LSB is flipped) */
 #define FD_STRETCH 1
 #define FD_SWAPSIDES 2
 #define FD_ZEROBASED 4
+#define FD_SECTBASEMASK 0x3FC
+#define FD_MKSECTBASE(s) (((s) ^ 1) << 2)
+#define FD_SECTBASE(floppy) ((((floppy)->stretch & FD_SECTBASEMASK) >> 2) ^ 1)
 
 	unsigned char	gap,		/* gap1 size */
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 580b513..32477e8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -86,7 +86,9 @@
 #define READ_META	(READ | (1 << BIO_RW_META))
 #define WRITE_SYNC	(WRITE | (1 << BIO_RW_SYNC))
 #define SWRITE_SYNC	(SWRITE | (1 << BIO_RW_SYNC))
-#define WRITE_BARRIER	((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
+#define WRITE_BARRIER	(WRITE | (1 << BIO_RW_BARRIER))
+#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD)
+#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER))
 
 #define SEL_IN		1
 #define SEL_OUT		2
@@ -222,6 +224,7 @@
 #define BLKTRACESTART _IO(0x12,116)
 #define BLKTRACESTOP _IO(0x12,117)
 #define BLKTRACETEARDOWN _IO(0x12,118)
+#define BLKDISCARD _IO(0x12,119)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
@@ -1682,6 +1685,7 @@
 
 /* fs/block_dev.c */
 #define BDEVNAME_SIZE	32	/* Largest string for a blockdev identifier */
+#define BDEVT_SIZE	10	/* Largest string for MAJ:MIN for blkdev */
 
 #ifdef CONFIG_BLOCK
 #define BLKDEV_MAJOR_HASH_SIZE	255
@@ -1718,6 +1722,9 @@
  */
 #define bio_data_dir(bio)	((bio)->bi_rw & 1)
 
+extern void check_disk_size_change(struct gendisk *disk,
+				   struct block_device *bdev);
+extern int revalidate_disk(struct gendisk *);
 extern int check_disk_change(struct block_device *);
 extern int __invalidate_device(struct block_device *);
 extern int invalidate_partition(struct gendisk *, int);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index be4f5e5..206cdf9 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -11,12 +11,15 @@
 
 #include <linux/types.h>
 #include <linux/kdev_t.h>
+#include <linux/rcupdate.h>
 
 #ifdef CONFIG_BLOCK
 
-#define kobj_to_dev(k) container_of(k, struct device, kobj)
-#define dev_to_disk(device) container_of(device, struct gendisk, dev)
-#define dev_to_part(device) container_of(device, struct hd_struct, dev)
+#define kobj_to_dev(k)		container_of((k), struct device, kobj)
+#define dev_to_disk(device)	container_of((device), struct gendisk, part0.__dev)
+#define dev_to_part(device)	container_of((device), struct hd_struct, __dev)
+#define disk_to_dev(disk)	(&(disk)->part0.__dev)
+#define part_to_dev(part)	(&((part)->__dev))
 
 extern struct device_type part_type;
 extern struct kobject *block_depr;
@@ -55,6 +58,9 @@
 	UNIXWARE_PARTITION = 0x63,	/* Same as GNU_HURD and SCO Unix */
 };
 
+#define DISK_MAX_PARTS			256
+#define DISK_NAME_LEN			32
+
 #include <linux/major.h>
 #include <linux/device.h>
 #include <linux/smp.h>
@@ -87,7 +93,7 @@
 struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
-	struct device dev;
+	struct device __dev;
 	struct kobject *holder_dir;
 	int policy, partno;
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -100,6 +106,7 @@
 #else
 	struct disk_stats dkstats;
 #endif
+	struct rcu_head rcu_head;
 };
 
 #define GENHD_FL_REMOVABLE			1
@@ -108,100 +115,148 @@
 #define GENHD_FL_CD				8
 #define GENHD_FL_UP				16
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
-#define GENHD_FL_FAIL				64
+#define GENHD_FL_EXT_DEVT			64 /* allow extended devt */
+
+#define BLK_SCSI_MAX_CMDS	(256)
+#define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
+
+struct blk_scsi_cmd_filter {
+	unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
+	unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
+	struct kobject kobj;
+};
+
+struct disk_part_tbl {
+	struct rcu_head rcu_head;
+	int len;
+	struct hd_struct *part[];
+};
 
 struct gendisk {
+	/* major, first_minor and minors are input parameters only,
+	 * don't use directly.  Use disk_devt() and disk_max_parts().
+	 */
 	int major;			/* major number of driver */
 	int first_minor;
 	int minors;                     /* maximum number of minors, =1 for
                                          * disks that can't be partitioned. */
-	char disk_name[32];		/* name of major driver */
-	struct hd_struct **part;	/* [indexed by minor] */
+
+	char disk_name[DISK_NAME_LEN];	/* name of major driver */
+
+	/* Array of pointers to partitions indexed by partno.
+	 * Protected with matching bdev lock but stat and other
+	 * non-critical accesses use RCU.  Always access through
+	 * helpers.
+	 */
+	struct disk_part_tbl *part_tbl;
+	struct hd_struct part0;
+
 	struct block_device_operations *fops;
 	struct request_queue *queue;
 	void *private_data;
-	sector_t capacity;
 
 	int flags;
 	struct device *driverfs_dev;  // FIXME: remove
-	struct device dev;
-	struct kobject *holder_dir;
 	struct kobject *slave_dir;
 
 	struct timer_rand_state *random;
-	int policy;
 
 	atomic_t sync_io;		/* RAID */
-	unsigned long stamp;
-	int in_flight;
-#ifdef	CONFIG_SMP
-	struct disk_stats *dkstats;
-#else
-	struct disk_stats dkstats;
-#endif
 	struct work_struct async_notify;
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct blk_integrity *integrity;
 #endif
+	int node_id;
 };
 
-/* 
- * Macros to operate on percpu disk statistics:
- *
- * The __ variants should only be called in critical sections. The full
- * variants disable/enable preemption.
- */
-static inline struct hd_struct *get_part(struct gendisk *gendiskp,
-					 sector_t sector)
+static inline struct gendisk *part_to_disk(struct hd_struct *part)
 {
-	struct hd_struct *part;
-	int i;
-	for (i = 0; i < gendiskp->minors - 1; i++) {
-		part = gendiskp->part[i];
-		if (part && part->start_sect <= sector
-		    && sector < part->start_sect + part->nr_sects)
-			return part;
+	if (likely(part)) {
+		if (part->partno)
+			return dev_to_disk(part_to_dev(part)->parent);
+		else
+			return dev_to_disk(part_to_dev(part));
 	}
 	return NULL;
 }
 
+static inline int disk_max_parts(struct gendisk *disk)
+{
+	if (disk->flags & GENHD_FL_EXT_DEVT)
+		return DISK_MAX_PARTS;
+	return disk->minors;
+}
+
+static inline bool disk_partitionable(struct gendisk *disk)
+{
+	return disk_max_parts(disk) > 1;
+}
+
+static inline dev_t disk_devt(struct gendisk *disk)
+{
+	return disk_to_dev(disk)->devt;
+}
+
+static inline dev_t part_devt(struct hd_struct *part)
+{
+	return part_to_dev(part)->devt;
+}
+
+extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno);
+
+static inline void disk_put_part(struct hd_struct *part)
+{
+	if (likely(part))
+		put_device(part_to_dev(part));
+}
+
+/*
+ * Smarter partition iterator without context limits.
+ */
+#define DISK_PITER_REVERSE	(1 << 0) /* iterate in the reverse direction */
+#define DISK_PITER_INCL_EMPTY	(1 << 1) /* include 0-sized parts */
+#define DISK_PITER_INCL_PART0	(1 << 2) /* include partition 0 */
+
+struct disk_part_iter {
+	struct gendisk		*disk;
+	struct hd_struct	*part;
+	int			idx;
+	unsigned int		flags;
+};
+
+extern void disk_part_iter_init(struct disk_part_iter *piter,
+				 struct gendisk *disk, unsigned int flags);
+extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter);
+extern void disk_part_iter_exit(struct disk_part_iter *piter);
+
+extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
+					     sector_t sector);
+
+/*
+ * Macros to operate on percpu disk statistics:
+ *
+ * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters
+ * and should be called between disk_stat_lock() and
+ * disk_stat_unlock().
+ *
+ * part_stat_read() can be called at any time.
+ *
+ * part_stat_{add|set_all}() and {init|free}_part_stats are for
+ * internal use only.
+ */
 #ifdef	CONFIG_SMP
-#define __disk_stat_add(gendiskp, field, addnd) 	\
-	(per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd)
+#define part_stat_lock()	({ rcu_read_lock(); get_cpu(); })
+#define part_stat_unlock()	do { put_cpu(); rcu_read_unlock(); } while (0)
 
-#define disk_stat_read(gendiskp, field)					\
-({									\
-	typeof(gendiskp->dkstats->field) res = 0;			\
-	int i;								\
-	for_each_possible_cpu(i)					\
-		res += per_cpu_ptr(gendiskp->dkstats, i)->field;	\
-	res;								\
-})
-
-static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
-	int i;
-
-	for_each_possible_cpu(i)
-		memset(per_cpu_ptr(gendiskp->dkstats, i), value,
-				sizeof(struct disk_stats));
-}		
-
-#define __part_stat_add(part, field, addnd)				\
-	(per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd)
-
-#define __all_stat_add(gendiskp, part, field, addnd, sector)	\
-({								\
-	if (part)						\
-		__part_stat_add(part, field, addnd);		\
-	__disk_stat_add(gendiskp, field, addnd);		\
-})
+#define __part_stat_add(cpu, part, field, addnd)			\
+	(per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd))
 
 #define part_stat_read(part, field)					\
 ({									\
-	typeof(part->dkstats->field) res = 0;				\
+	typeof((part)->dkstats->field) res = 0;				\
 	int i;								\
 	for_each_possible_cpu(i)					\
-		res += per_cpu_ptr(part->dkstats, i)->field;		\
+		res += per_cpu_ptr((part)->dkstats, i)->field;		\
 	res;								\
 })
 
@@ -213,108 +268,6 @@
 		memset(per_cpu_ptr(part->dkstats, i), value,
 				sizeof(struct disk_stats));
 }
-				
-#else /* !CONFIG_SMP */
-#define __disk_stat_add(gendiskp, field, addnd) \
-				(gendiskp->dkstats.field += addnd)
-#define disk_stat_read(gendiskp, field)	(gendiskp->dkstats.field)
-
-static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)
-{
-	memset(&gendiskp->dkstats, value, sizeof (struct disk_stats));
-}
-
-#define __part_stat_add(part, field, addnd) \
-	(part->dkstats.field += addnd)
-
-#define __all_stat_add(gendiskp, part, field, addnd, sector)	\
-({								\
-	if (part)						\
-		part->dkstats.field += addnd;			\
-	__disk_stat_add(gendiskp, field, addnd);		\
-})
-
-#define part_stat_read(part, field)	(part->dkstats.field)
-
-static inline void part_stat_set_all(struct hd_struct *part, int value)
-{
-	memset(&part->dkstats, value, sizeof(struct disk_stats));
-}
-
-#endif /* CONFIG_SMP */
-
-#define disk_stat_add(gendiskp, field, addnd)			\
-	do {							\
-		preempt_disable();				\
-		__disk_stat_add(gendiskp, field, addnd);	\
-		preempt_enable();				\
-	} while (0)
-
-#define __disk_stat_dec(gendiskp, field) __disk_stat_add(gendiskp, field, -1)
-#define disk_stat_dec(gendiskp, field) disk_stat_add(gendiskp, field, -1)
-
-#define __disk_stat_inc(gendiskp, field) __disk_stat_add(gendiskp, field, 1)
-#define disk_stat_inc(gendiskp, field) disk_stat_add(gendiskp, field, 1)
-
-#define __disk_stat_sub(gendiskp, field, subnd) \
-		__disk_stat_add(gendiskp, field, -subnd)
-#define disk_stat_sub(gendiskp, field, subnd) \
-		disk_stat_add(gendiskp, field, -subnd)
-
-#define part_stat_add(gendiskp, field, addnd)		\
-	do {						\
-		preempt_disable();			\
-		__part_stat_add(gendiskp, field, addnd);\
-		preempt_enable();			\
-	} while (0)
-
-#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1)
-#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1)
-
-#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1)
-#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1)
-
-#define __part_stat_sub(gendiskp, field, subnd) \
-		__part_stat_add(gendiskp, field, -subnd)
-#define part_stat_sub(gendiskp, field, subnd) \
-		part_stat_add(gendiskp, field, -subnd)
-
-#define all_stat_add(gendiskp, part, field, addnd, sector)	\
-	do {							\
-		preempt_disable();				\
-		__all_stat_add(gendiskp, part, field, addnd, sector);	\
-		preempt_enable();				\
-	} while (0)
-
-#define __all_stat_dec(gendiskp, field, sector) \
-		__all_stat_add(gendiskp, field, -1, sector)
-#define all_stat_dec(gendiskp, field, sector) \
-		all_stat_add(gendiskp, field, -1, sector)
-
-#define __all_stat_inc(gendiskp, part, field, sector) \
-		__all_stat_add(gendiskp, part, field, 1, sector)
-#define all_stat_inc(gendiskp, part, field, sector) \
-		all_stat_add(gendiskp, part, field, 1, sector)
-
-#define __all_stat_sub(gendiskp, part, field, subnd, sector) \
-		__all_stat_add(gendiskp, part, field, -subnd, sector)
-#define all_stat_sub(gendiskp, part, field, subnd, sector) \
-		all_stat_add(gendiskp, part, field, -subnd, sector)
-
-/* Inlines to alloc and free disk stats in struct gendisk */
-#ifdef  CONFIG_SMP
-static inline int init_disk_stats(struct gendisk *disk)
-{
-	disk->dkstats = alloc_percpu(struct disk_stats);
-	if (!disk->dkstats)
-		return 0;
-	return 1;
-}
-
-static inline void free_disk_stats(struct gendisk *disk)
-{
-	free_percpu(disk->dkstats);
-}
 
 static inline int init_part_stats(struct hd_struct *part)
 {
@@ -329,14 +282,18 @@
 	free_percpu(part->dkstats);
 }
 
-#else	/* CONFIG_SMP */
-static inline int init_disk_stats(struct gendisk *disk)
-{
-	return 1;
-}
+#else /* !CONFIG_SMP */
+#define part_stat_lock()	({ rcu_read_lock(); 0; })
+#define part_stat_unlock()	rcu_read_unlock()
 
-static inline void free_disk_stats(struct gendisk *disk)
+#define __part_stat_add(cpu, part, field, addnd)				\
+	((part)->dkstats.field += addnd)
+
+#define part_stat_read(part, field)	((part)->dkstats.field)
+
+static inline void part_stat_set_all(struct hd_struct *part, int value)
 {
+	memset(&part->dkstats, value, sizeof(struct disk_stats));
 }
 
 static inline int init_part_stats(struct hd_struct *part)
@@ -347,37 +304,71 @@
 static inline void free_part_stats(struct hd_struct *part)
 {
 }
-#endif	/* CONFIG_SMP */
+
+#endif /* CONFIG_SMP */
+
+#define part_stat_add(cpu, part, field, addnd)	do {			\
+	__part_stat_add((cpu), (part), field, addnd);			\
+	if ((part)->partno)						\
+		__part_stat_add((cpu), &part_to_disk((part))->part0,	\
+				field, addnd);				\
+} while (0)
+
+#define part_stat_dec(cpu, gendiskp, field)				\
+	part_stat_add(cpu, gendiskp, field, -1)
+#define part_stat_inc(cpu, gendiskp, field)				\
+	part_stat_add(cpu, gendiskp, field, 1)
+#define part_stat_sub(cpu, gendiskp, field, subnd)			\
+	part_stat_add(cpu, gendiskp, field, -subnd)
+
+static inline void part_inc_in_flight(struct hd_struct *part)
+{
+	part->in_flight++;
+	if (part->partno)
+		part_to_disk(part)->part0.in_flight++;
+}
+
+static inline void part_dec_in_flight(struct hd_struct *part)
+{
+	part->in_flight--;
+	if (part->partno)
+		part_to_disk(part)->part0.in_flight--;
+}
 
 /* drivers/block/ll_rw_blk.c */
-extern void disk_round_stats(struct gendisk *disk);
-extern void part_round_stats(struct hd_struct *part);
+extern void part_round_stats(int cpu, struct hd_struct *part);
 
 /* drivers/block/genhd.c */
 extern int get_blkdev_list(char *, int);
 extern void add_disk(struct gendisk *disk);
 extern void del_gendisk(struct gendisk *gp);
 extern void unlink_gendisk(struct gendisk *gp);
-extern struct gendisk *get_gendisk(dev_t dev, int *part);
+extern struct gendisk *get_gendisk(dev_t dev, int *partno);
+extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
 
 extern void set_device_ro(struct block_device *bdev, int flag);
 extern void set_disk_ro(struct gendisk *disk, int flag);
 
+static inline int get_disk_ro(struct gendisk *disk)
+{
+	return disk->part0.policy;
+}
+
 /* drivers/char/random.c */
 extern void add_disk_randomness(struct gendisk *disk);
 extern void rand_initialize_disk(struct gendisk *disk);
 
 static inline sector_t get_start_sect(struct block_device *bdev)
 {
-	return bdev->bd_contains == bdev ? 0 : bdev->bd_part->start_sect;
+	return bdev->bd_part->start_sect;
 }
 static inline sector_t get_capacity(struct gendisk *disk)
 {
-	return disk->capacity;
+	return disk->part0.nr_sects;
 }
 static inline void set_capacity(struct gendisk *disk, sector_t size)
 {
-	disk->capacity = size;
+	disk->part0.nr_sects = size;
 }
 
 #ifdef CONFIG_SOLARIS_X86_PARTITION
@@ -527,9 +518,12 @@
 #define ADDPART_FLAG_RAID	1
 #define ADDPART_FLAG_WHOLEDISK	2
 
-extern dev_t blk_lookup_devt(const char *name, int part);
-extern char *disk_name (struct gendisk *hd, int part, char *buf);
+extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
+extern void blk_free_devt(dev_t devt);
+extern dev_t blk_lookup_devt(const char *name, int partno);
+extern char *disk_name (struct gendisk *hd, int partno, char *buf);
 
+extern int disk_expand_part_tbl(struct gendisk *disk, int target);
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
 extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int);
 extern void delete_partition(struct gendisk *, int);
@@ -546,16 +540,23 @@
 			void *data);
 extern void blk_unregister_region(dev_t devt, unsigned long range);
 
-static inline struct block_device *bdget_disk(struct gendisk *disk, int index)
-{
-	return bdget(MKDEV(disk->major, disk->first_minor) + index);
-}
+extern ssize_t part_size_show(struct device *dev,
+			      struct device_attribute *attr, char *buf);
+extern ssize_t part_stat_show(struct device *dev,
+			      struct device_attribute *attr, char *buf);
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+extern ssize_t part_fail_show(struct device *dev,
+			      struct device_attribute *attr, char *buf);
+extern ssize_t part_fail_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count);
+#endif /* CONFIG_FAIL_MAKE_REQUEST */
 
 #else /* CONFIG_BLOCK */
 
 static inline void printk_all_partitions(void) { }
 
-static inline dev_t blk_lookup_devt(const char *name, int part)
+static inline dev_t blk_lookup_devt(const char *name, int partno)
 {
 	dev_t devt = MKDEV(0, 0);
 	return devt;
diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index c3c19f9..14d0df0 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -118,7 +118,11 @@
 
 	char sb_lockproto[GFS2_LOCKNAME_LEN];
 	char sb_locktable[GFS2_LOCKNAME_LEN];
-	/* In gfs1, quota and license dinodes followed */
+
+	struct gfs2_inum __pad3; /* Was quota inode in gfs1 */
+	struct gfs2_inum __pad4; /* Was licence inode in gfs1 */
+#define GFS2_HAS_UUID 1
+	__u8 sb_uuid[16]; /* The UUID, maybe 0 for backwards compat */
 };
 
 /*
diff --git a/include/linux/klist.h b/include/linux/klist.h
index 06c338e..8ea98db 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -38,7 +38,7 @@
 		       void (*put)(struct klist_node *));
 
 struct klist_node {
-	struct klist		*n_klist;
+	void			*n_klist;	/* never access directly */
 	struct list_head	n_node;
 	struct kref		n_ref;
 	struct completion	n_removed;
@@ -57,7 +57,6 @@
 
 struct klist_iter {
 	struct klist		*i_klist;
-	struct list_head	*i_head;
 	struct klist_node	*i_cur;
 };
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 225bfc5..947cf84 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -146,6 +146,7 @@
 	ATA_DFLAG_SPUNDOWN	= (1 << 14), /* XXX: for spindown_compat */
 	ATA_DFLAG_SLEEPING	= (1 << 15), /* device is sleeping */
 	ATA_DFLAG_DUBIOUS_XFER	= (1 << 16), /* data transfer not verified */
+	ATA_DFLAG_NO_UNLOAD	= (1 << 17), /* device doesn't support unload */
 	ATA_DFLAG_INIT_MASK	= (1 << 24) - 1,
 
 	ATA_DFLAG_DETACH	= (1 << 24),
@@ -244,6 +245,7 @@
 	ATA_TMOUT_BOOT		= 30000,	/* heuristic */
 	ATA_TMOUT_BOOT_QUICK	=  7000,	/* heuristic */
 	ATA_TMOUT_INTERNAL_QUICK = 5000,
+	ATA_TMOUT_MAX_PARK	= 30000,
 
 	/* FIXME: GoVault needs 2s but we can't afford that without
 	 * parallel probing.  800ms is enough for iVDR disk
@@ -319,8 +321,11 @@
 	ATA_EH_RESET		= ATA_EH_SOFTRESET | ATA_EH_HARDRESET,
 	ATA_EH_ENABLE_LINK	= (1 << 3),
 	ATA_EH_LPM		= (1 << 4),  /* link power management action */
+	ATA_EH_PARK		= (1 << 5), /* unload heads and stop I/O */
 
-	ATA_EH_PERDEV_MASK	= ATA_EH_REVALIDATE,
+	ATA_EH_PERDEV_MASK	= ATA_EH_REVALIDATE | ATA_EH_PARK,
+	ATA_EH_ALL_ACTIONS	= ATA_EH_REVALIDATE | ATA_EH_RESET |
+				  ATA_EH_ENABLE_LINK | ATA_EH_LPM,
 
 	/* ata_eh_info->flags */
 	ATA_EHI_HOTPLUGGED	= (1 << 0),  /* could have been hotplugged */
@@ -452,6 +457,7 @@
 	MEDIUM_POWER,
 };
 extern struct device_attribute dev_attr_link_power_management_policy;
+extern struct device_attribute dev_attr_unload_heads;
 extern struct device_attribute dev_attr_em_message_type;
 extern struct device_attribute dev_attr_em_message;
 extern struct device_attribute dev_attr_sw_activity;
@@ -554,8 +560,8 @@
 struct ata_device {
 	struct ata_link		*link;
 	unsigned int		devno;		/* 0 or 1 */
-	unsigned long		flags;		/* ATA_DFLAG_xxx */
 	unsigned int		horkage;	/* List of broken features */
+	unsigned long		flags;		/* ATA_DFLAG_xxx */
 	struct scsi_device	*sdev;		/* attached SCSI device */
 #ifdef CONFIG_ATA_ACPI
 	acpi_handle		acpi_handle;
@@ -564,6 +570,7 @@
 	/* n_sector is used as CLEAR_OFFSET, read comment above CLEAR_OFFSET */
 	u64			n_sectors;	/* size of device, if ATA */
 	unsigned int		class;		/* ATA_DEV_xxx */
+	unsigned long		unpark_deadline;
 
 	u8			pio_mode;
 	u8			dma_mode;
@@ -621,6 +628,7 @@
 					       [ATA_EH_CMD_TIMEOUT_TABLE_SIZE];
 	unsigned int		classes[ATA_MAX_DEVICES];
 	unsigned int		did_probe_mask;
+	unsigned int		unloaded_mask;
 	unsigned int		saved_ncq_enabled;
 	u8			saved_xfer_mode[ATA_MAX_DEVICES];
 	/* timestamp for the last reset attempt or success */
@@ -688,7 +696,8 @@
 	unsigned int		qc_active;
 	int			nr_active_links; /* #links with active qcs */
 
-	struct ata_link		link;	/* host default link */
+	struct ata_link		link;		/* host default link */
+	struct ata_link		*slave_link;	/* see ata_slave_link_init() */
 
 	int			nr_pmp_links;	/* nr of available PMP links */
 	struct ata_link		*pmp_link;	/* array of PMP links */
@@ -709,6 +718,7 @@
 	struct list_head	eh_done_q;
 	wait_queue_head_t	eh_wait_q;
 	int			eh_tries;
+	struct completion	park_req_pending;
 
 	pm_message_t		pm_mesg;
 	int			*pm_result;
@@ -772,8 +782,8 @@
 	/*
 	 * Optional features
 	 */
-	int  (*scr_read)(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-	int  (*scr_write)(struct ata_port *ap, unsigned int sc_reg, u32 val);
+	int  (*scr_read)(struct ata_link *link, unsigned int sc_reg, u32 *val);
+	int  (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val);
 	void (*pmp_attach)(struct ata_port *ap);
 	void (*pmp_detach)(struct ata_port *ap);
 	int  (*enable_pm)(struct ata_port *ap, enum link_pm policy);
@@ -895,6 +905,7 @@
 extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports);
 extern struct ata_host *ata_host_alloc_pinfo(struct device *dev,
 			const struct ata_port_info * const * ppi, int n_ports);
+extern int ata_slave_link_init(struct ata_port *ap);
 extern int ata_host_start(struct ata_host *host);
 extern int ata_host_register(struct ata_host *host,
 			     struct scsi_host_template *sht);
@@ -920,8 +931,8 @@
 extern int sata_scr_read(struct ata_link *link, int reg, u32 *val);
 extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
 extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val);
-extern int ata_link_online(struct ata_link *link);
-extern int ata_link_offline(struct ata_link *link);
+extern bool ata_link_online(struct ata_link *link);
+extern bool ata_link_offline(struct ata_link *link);
 #ifdef CONFIG_PM
 extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg);
 extern void ata_host_resume(struct ata_host *host);
@@ -1098,6 +1109,7 @@
  */
 extern const struct ata_port_operations ata_base_port_ops;
 extern const struct ata_port_operations sata_port_ops;
+extern struct device_attribute *ata_common_sdev_attrs[];
 
 #define ATA_BASE_SHT(drv_name)					\
 	.module			= THIS_MODULE,			\
@@ -1112,7 +1124,8 @@
 	.proc_name		= drv_name,			\
 	.slave_configure	= ata_scsi_slave_config,	\
 	.slave_destroy		= ata_scsi_slave_destroy,	\
-	.bios_param		= ata_std_bios_param
+	.bios_param		= ata_std_bios_param,		\
+	.sdev_attrs		= ata_common_sdev_attrs
 
 #define ATA_NCQ_SHT(drv_name)					\
 	ATA_BASE_SHT(drv_name),					\
@@ -1134,7 +1147,7 @@
 
 static inline int ata_is_host_link(const struct ata_link *link)
 {
-	return link == &link->ap->link;
+	return link == &link->ap->link || link == link->ap->slave_link;
 }
 #else /* CONFIG_SATA_PMP */
 static inline bool sata_pmp_supported(struct ata_port *ap)
@@ -1167,7 +1180,7 @@
 	printk("%sata%u: "fmt, lv, (ap)->print_id , ##args)
 
 #define ata_link_printk(link, lv, fmt, args...) do { \
-	if (sata_pmp_attached((link)->ap)) \
+	if (sata_pmp_attached((link)->ap) || (link)->ap->slave_link)	\
 		printk("%sata%u.%02u: "fmt, lv, (link)->ap->print_id,	\
 		       (link)->pmp , ##args); \
 	else \
@@ -1265,34 +1278,17 @@
 	return ata_tag_valid(link->active_tag) || link->sactive;
 }
 
-static inline struct ata_link *ata_port_first_link(struct ata_port *ap)
-{
-	if (sata_pmp_attached(ap))
-		return ap->pmp_link;
-	return &ap->link;
-}
+extern struct ata_link *__ata_port_next_link(struct ata_port *ap,
+					     struct ata_link *link,
+					     bool dev_only);
 
-static inline struct ata_link *ata_port_next_link(struct ata_link *link)
-{
-	struct ata_port *ap = link->ap;
-
-	if (ata_is_host_link(link)) {
-		if (!sata_pmp_attached(ap))
-			return NULL;
-		return ap->pmp_link;
-	}
-
-	if (++link < ap->nr_pmp_links + ap->pmp_link)
-		return link;
-	return NULL;
-}
-
-#define __ata_port_for_each_link(lk, ap) \
-	for ((lk) = &(ap)->link; (lk); (lk) = ata_port_next_link(lk))
+#define __ata_port_for_each_link(link, ap) \
+	for ((link) = __ata_port_next_link((ap), NULL, false); (link); \
+	     (link) = __ata_port_next_link((ap), (link), false))
 
 #define ata_port_for_each_link(link, ap) \
-	for ((link) = ata_port_first_link(ap); (link); \
-	     (link) = ata_port_next_link(link))
+	for ((link) = __ata_port_next_link((ap), NULL, true); (link); \
+	     (link) = __ata_port_next_link((ap), (link), true))
 
 #define ata_link_for_each_dev(dev, link) \
 	for ((dev) = (link)->device; \
diff --git a/include/linux/major.h b/include/linux/major.h
index 53d5faf..8824945 100644
--- a/include/linux/major.h
+++ b/include/linux/major.h
@@ -170,4 +170,6 @@
 
 #define VIOTAPE_MAJOR		230
 
+#define BLOCK_EXT_MAJOR		259
+
 #endif
diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index 310e616..8b4aa05 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -41,6 +41,8 @@
 		    unsigned long block, char *buffer);
 	int (*writesect)(struct mtd_blktrans_dev *dev,
 		     unsigned long block, char *buffer);
+	int (*discard)(struct mtd_blktrans_dev *dev,
+		       unsigned long block, unsigned nr_blocks);
 
 	/* Block layer ioctls */
 	int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo);
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
new file mode 100644
index 0000000..a3eb2f6
--- /dev/null
+++ b/include/linux/string_helpers.h
@@ -0,0 +1,16 @@
+#ifndef _LINUX_STRING_HELPERS_H_
+#define _LINUX_STRING_HELPERS_H_
+
+#include <linux/types.h>
+
+/* Descriptions of the types of units to
+ * print in */
+enum string_size_units {
+	STRING_UNITS_10,	/* use powers of 10^3 (standard SI) */
+	STRING_UNITS_2,		/* use binary powers of 2^10 */
+};
+
+int string_get_size(u64 size, enum string_size_units units,
+		    char *buf, int len);
+
+#endif
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index f9f6e79..855bf95 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -75,7 +75,6 @@
 
 	int retries;
 	int allowed;
-	int timeout_per_command;
 
 	unsigned char prot_op;
 	unsigned char prot_type;
@@ -86,7 +85,6 @@
 	/* These elements define the operation we are about to perform */
 	unsigned char *cmnd;
 
-	struct timer_list eh_timeout;	/* Used to time out the command. */
 
 	/* These elements define the operation we ultimately want to perform */
 	struct scsi_data_buffer sdb;
@@ -139,7 +137,6 @@
 extern void __scsi_put_command(struct Scsi_Host *, struct scsi_cmnd *,
 			       struct device *);
 extern void scsi_finish_command(struct scsi_cmnd *cmd);
-extern void scsi_req_abort_cmd(struct scsi_cmnd *cmd);
 
 extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
 				 size_t *offset, size_t *len);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 80b2e93..b49e725 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -42,9 +42,11 @@
 				 * originate in the mid-layer) */
 	SDEV_OFFLINE,		/* Device offlined (by error handling or
 				 * user request */
-	SDEV_BLOCK,		/* Device blocked by scsi lld.  No scsi 
-				 * commands from user or midlayer should be issued
-				 * to the scsi lld. */
+	SDEV_BLOCK,		/* Device blocked by scsi lld.  No
+				 * scsi commands from user or midlayer
+				 * should be issued to the scsi
+				 * lld. */
+	SDEV_CREATED_BLOCK,	/* same as above but for created devices */
 };
 
 enum scsi_device_event {
@@ -384,10 +386,23 @@
 #define scmd_id(scmd) sdev_id((scmd)->device)
 #define scmd_channel(scmd) sdev_channel((scmd)->device)
 
+/*
+ * checks for positions of the SCSI state machine
+ */
 static inline int scsi_device_online(struct scsi_device *sdev)
 {
 	return sdev->sdev_state != SDEV_OFFLINE;
 }
+static inline int scsi_device_blocked(struct scsi_device *sdev)
+{
+	return sdev->sdev_state == SDEV_BLOCK ||
+		sdev->sdev_state == SDEV_CREATED_BLOCK;
+}
+static inline int scsi_device_created(struct scsi_device *sdev)
+{
+	return sdev->sdev_state == SDEV_CREATED ||
+		sdev->sdev_state == SDEV_CREATED_BLOCK;
+}
 
 /* accessor functions for the SCSI parameters */
 static inline int scsi_device_sync(struct scsi_device *sdev)
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 44a55d1..d123ca8 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -43,13 +43,6 @@
 #define DISABLE_CLUSTERING 0
 #define ENABLE_CLUSTERING 1
 
-enum scsi_eh_timer_return {
-	EH_NOT_HANDLED,
-	EH_HANDLED,
-	EH_RESET_TIMER,
-};
-
-
 struct scsi_host_template {
 	struct module *module;
 	const char *name;
@@ -347,7 +340,7 @@
 	 *
 	 * Status: OPTIONAL
 	 */
-	enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *);
+	enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *);
 
 	/*
 	 * Name of proc directory
diff --git a/include/scsi/scsi_netlink.h b/include/scsi/scsi_netlink.h
index 8c1470c..536752c 100644
--- a/include/scsi/scsi_netlink.h
+++ b/include/scsi/scsi_netlink.h
@@ -22,6 +22,9 @@
 #ifndef SCSI_NETLINK_H
 #define SCSI_NETLINK_H
 
+#include <linux/netlink.h>
+
+
 /*
  * This file intended to be included by both kernel and user space
  */
@@ -55,7 +58,41 @@
 #define SCSI_NL_TRANSPORT_FC			1
 #define SCSI_NL_MAX_TRANSPORTS			2
 
-/* scsi_nl_hdr->msgtype values are defined in each transport */
+/* Transport-based scsi_nl_hdr->msgtype values are defined in each transport */
+
+/*
+ * GENERIC SCSI scsi_nl_hdr->msgtype Values
+ */
+	/* kernel -> user */
+#define SCSI_NL_SHOST_VENDOR			0x0001
+	/* user -> kernel */
+/* SCSI_NL_SHOST_VENDOR msgtype is kernel->user and user->kernel */
+
+
+/*
+ * Message Structures :
+ */
+
+/* macro to round up message lengths to 8byte boundary */
+#define SCSI_NL_MSGALIGN(len)		(((len) + 7) & ~7)
+
+
+/*
+ * SCSI HOST Vendor Unique messages :
+ *   SCSI_NL_SHOST_VENDOR
+ *
+ * Note: The Vendor Unique message payload will begin directly after
+ * 	 this structure, with the length of the payload per vmsg_datalen.
+ *
+ * Note: When specifying vendor_id, be sure to read the Vendor Type and ID
+ *   formatting requirements specified below
+ */
+struct scsi_nl_host_vendor_msg {
+	struct scsi_nl_hdr snlh;		/* must be 1st element ! */
+	uint64_t vendor_id;
+	uint16_t host_no;
+	uint16_t vmsg_datalen;
+} __attribute__((aligned(sizeof(uint64_t))));
 
 
 /*
@@ -83,5 +120,28 @@
 	}
 
 
+#ifdef __KERNEL__
+
+#include <scsi/scsi_host.h>
+
+/* Exported Kernel Interfaces */
+int scsi_nl_add_transport(u8 tport,
+	 int (*msg_handler)(struct sk_buff *),
+	void (*event_handler)(struct notifier_block *, unsigned long, void *));
+void scsi_nl_remove_transport(u8 tport);
+
+int scsi_nl_add_driver(u64 vendor_id, struct scsi_host_template *hostt,
+	int (*nlmsg_handler)(struct Scsi_Host *shost, void *payload,
+				 u32 len, u32 pid),
+	void (*nlevt_handler)(struct notifier_block *nb,
+				 unsigned long event, void *notify_ptr));
+void scsi_nl_remove_driver(u64 vendor_id);
+
+void scsi_nl_send_transport_msg(u32 pid, struct scsi_nl_hdr *hdr);
+int scsi_nl_send_vendor_msg(u32 pid, unsigned short host_no, u64 vendor_id,
+			 char *data_buf, u32 data_len);
+
+#endif /* __KERNEL__ */
+
 #endif /* SCSI_NETLINK_H */
 
diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.h
index 490bd13..0de32cd 100644
--- a/include/scsi/scsi_transport.h
+++ b/include/scsi/scsi_transport.h
@@ -21,6 +21,7 @@
 #define SCSI_TRANSPORT_H
 
 #include <linux/transport_class.h>
+#include <linux/blkdev.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_device.h>
 
@@ -64,7 +65,7 @@
 	 *			begin counting again
 	 * EH_NOT_HANDLED	Begin normal error recovery
 	 */
-	enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *);
+	enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *);
 
 	/*
 	 * Used as callback for the completion of i_t_nexus request
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 878373c..21018a4 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -167,6 +167,26 @@
 struct device_attribute dev_attr_vport_##_name = 	\
 	__ATTR(_name,_mode,_show,_store)
 
+/*
+ * fc_vport_identifiers: This set of data contains all elements
+ * to uniquely identify and instantiate a FC virtual port.
+ *
+ * Notes:
+ *   symbolic_name: The driver is to append the symbolic_name string data
+ *      to the symbolic_node_name data that it generates by default.
+ *      the resulting combination should then be registered with the switch.
+ *      It is expected that things like Xen may stuff a VM title into
+ *      this field.
+ */
+#define FC_VPORT_SYMBOLIC_NAMELEN		64
+struct fc_vport_identifiers {
+	u64 node_name;
+	u64 port_name;
+	u32 roles;
+	bool disable;
+	enum fc_port_type vport_type;	/* only FC_PORTTYPE_NPIV allowed */
+	char symbolic_name[FC_VPORT_SYMBOLIC_NAMELEN];
+};
 
 /*
  * FC Virtual Port Attributes
@@ -197,7 +217,6 @@
  * managed by the transport w/o driver interaction.
  */
 
-#define FC_VPORT_SYMBOLIC_NAMELEN		64
 struct fc_vport {
 	/* Fixed Attributes */
 
@@ -732,6 +751,8 @@
 	 *   be sure to read the Vendor Type and ID formatting requirements
 	 *   specified in scsi_netlink.h
 	 */
+struct fc_vport *fc_vport_create(struct Scsi_Host *shost, int channel,
+		struct fc_vport_identifiers *);
 int fc_vport_terminate(struct fc_vport *vport);
 
 #endif /* SCSI_TRANSPORT_FC_H */
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 3715feb..d055b19 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -263,6 +263,10 @@
 		printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
 
 		printk_all_partitions();
+#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
+		printk("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify "
+		       "explicit textual name for \"root=\" boot option.\n");
+#endif
 		panic("VFS: Unable to mount root fs on %s", b);
 	}
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0b50481..7d7a31d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -624,6 +624,28 @@
 
 	  Say N if you are unsure.
 
+config DEBUG_BLOCK_EXT_DEVT
+        bool "Force extended block device numbers and spread them"
+	depends on DEBUG_KERNEL
+	depends on BLOCK
+	default n
+	help
+	  Conventionally, block device numbers are allocated from
+	  predetermined contiguous area.  However, extended block area
+	  may introduce non-contiguous block device numbers.  This
+	  option forces most block device numbers to be allocated from
+	  the extended space and spreads them to discover kernel or
+	  userland code paths which assume predetermined contiguous
+	  device number allocation.
+
+	  Note that turning on this debug option shuffles all the
+	  device numbers for all IDE and SCSI devices including libata
+	  ones, so root partition specified using device number
+	  directly (via rdev or root=MAJ:MIN) won't work anymore.
+	  Textual device names (root=/dev/sdXn) will continue to work.
+
+	  Say N if you are unsure.
+
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
 	depends on DEBUG_KERNEL
@@ -661,10 +683,21 @@
 
 config FAIL_MAKE_REQUEST
 	bool "Fault-injection capability for disk IO"
-	depends on FAULT_INJECTION
+	depends on FAULT_INJECTION && BLOCK
 	help
 	  Provide fault-injection capability for disk IO.
 
+config FAIL_IO_TIMEOUT
+	bool "Faul-injection capability for faking disk interrupts"
+	depends on FAULT_INJECTION && BLOCK
+	help
+	  Provide fault-injection capability on end IO handling. This
+	  will make the block layer "forget" an interrupt as configured,
+	  thus exercising the error handling.
+
+	  Only works with drivers that use the generic timeout handling,
+	  for others it wont do anything.
+
 config FAULT_INJECTION_DEBUG_FS
 	bool "Debugfs entries for fault-injection capabilities"
 	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
diff --git a/lib/Makefile b/lib/Makefile
index 3b1f94b..44001af 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -19,7 +19,8 @@
 lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
-	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o
+	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
+	 string_helpers.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/klist.c b/lib/klist.c
index cca37f9..bbdd301 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -37,6 +37,37 @@
 #include <linux/klist.h>
 #include <linux/module.h>
 
+/*
+ * Use the lowest bit of n_klist to mark deleted nodes and exclude
+ * dead ones from iteration.
+ */
+#define KNODE_DEAD		1LU
+#define KNODE_KLIST_MASK	~KNODE_DEAD
+
+static struct klist *knode_klist(struct klist_node *knode)
+{
+	return (struct klist *)
+		((unsigned long)knode->n_klist & KNODE_KLIST_MASK);
+}
+
+static bool knode_dead(struct klist_node *knode)
+{
+	return (unsigned long)knode->n_klist & KNODE_DEAD;
+}
+
+static void knode_set_klist(struct klist_node *knode, struct klist *klist)
+{
+	knode->n_klist = klist;
+	/* no knode deserves to start its life dead */
+	WARN_ON(knode_dead(knode));
+}
+
+static void knode_kill(struct klist_node *knode)
+{
+	/* and no knode should die twice ever either, see we're very humane */
+	WARN_ON(knode_dead(knode));
+	*(unsigned long *)&knode->n_klist |= KNODE_DEAD;
+}
 
 /**
  * klist_init - Initialize a klist structure.
@@ -79,7 +110,7 @@
 	INIT_LIST_HEAD(&n->n_node);
 	init_completion(&n->n_removed);
 	kref_init(&n->n_ref);
-	n->n_klist = k;
+	knode_set_klist(n, k);
 	if (k->get)
 		k->get(n);
 }
@@ -115,7 +146,7 @@
  */
 void klist_add_after(struct klist_node *n, struct klist_node *pos)
 {
-	struct klist *k = pos->n_klist;
+	struct klist *k = knode_klist(pos);
 
 	klist_node_init(k, n);
 	spin_lock(&k->k_lock);
@@ -131,7 +162,7 @@
  */
 void klist_add_before(struct klist_node *n, struct klist_node *pos)
 {
-	struct klist *k = pos->n_klist;
+	struct klist *k = knode_klist(pos);
 
 	klist_node_init(k, n);
 	spin_lock(&k->k_lock);
@@ -144,9 +175,10 @@
 {
 	struct klist_node *n = container_of(kref, struct klist_node, n_ref);
 
+	WARN_ON(!knode_dead(n));
 	list_del(&n->n_node);
 	complete(&n->n_removed);
-	n->n_klist = NULL;
+	knode_set_klist(n, NULL);
 }
 
 static int klist_dec_and_del(struct klist_node *n)
@@ -154,21 +186,28 @@
 	return kref_put(&n->n_ref, klist_release);
 }
 
+static void klist_put(struct klist_node *n, bool kill)
+{
+	struct klist *k = knode_klist(n);
+	void (*put)(struct klist_node *) = k->put;
+
+	spin_lock(&k->k_lock);
+	if (kill)
+		knode_kill(n);
+	if (!klist_dec_and_del(n))
+		put = NULL;
+	spin_unlock(&k->k_lock);
+	if (put)
+		put(n);
+}
+
 /**
  * klist_del - Decrement the reference count of node and try to remove.
  * @n: node we're deleting.
  */
 void klist_del(struct klist_node *n)
 {
-	struct klist *k = n->n_klist;
-	void (*put)(struct klist_node *) = k->put;
-
-	spin_lock(&k->k_lock);
-	if (!klist_dec_and_del(n))
-		put = NULL;
-	spin_unlock(&k->k_lock);
-	if (put)
-		put(n);
+	klist_put(n, true);
 }
 EXPORT_SYMBOL_GPL(klist_del);
 
@@ -206,7 +245,6 @@
 			  struct klist_node *n)
 {
 	i->i_klist = k;
-	i->i_head = &k->k_list;
 	i->i_cur = n;
 	if (n)
 		kref_get(&n->n_ref);
@@ -237,7 +275,7 @@
 void klist_iter_exit(struct klist_iter *i)
 {
 	if (i->i_cur) {
-		klist_del(i->i_cur);
+		klist_put(i->i_cur, false);
 		i->i_cur = NULL;
 	}
 }
@@ -258,27 +296,33 @@
  */
 struct klist_node *klist_next(struct klist_iter *i)
 {
-	struct list_head *next;
-	struct klist_node *lnode = i->i_cur;
-	struct klist_node *knode = NULL;
 	void (*put)(struct klist_node *) = i->i_klist->put;
+	struct klist_node *last = i->i_cur;
+	struct klist_node *next;
 
 	spin_lock(&i->i_klist->k_lock);
-	if (lnode) {
-		next = lnode->n_node.next;
-		if (!klist_dec_and_del(lnode))
+
+	if (last) {
+		next = to_klist_node(last->n_node.next);
+		if (!klist_dec_and_del(last))
 			put = NULL;
 	} else
-		next = i->i_head->next;
+		next = to_klist_node(i->i_klist->k_list.next);
 
-	if (next != i->i_head) {
-		knode = to_klist_node(next);
-		kref_get(&knode->n_ref);
+	i->i_cur = NULL;
+	while (next != to_klist_node(&i->i_klist->k_list)) {
+		if (likely(!knode_dead(next))) {
+			kref_get(&next->n_ref);
+			i->i_cur = next;
+			break;
+		}
+		next = to_klist_node(next->n_node.next);
 	}
-	i->i_cur = knode;
+
 	spin_unlock(&i->i_klist->k_lock);
-	if (put && lnode)
-		put(lnode);
-	return knode;
+
+	if (put && last)
+		put(last);
+	return i->i_cur;
 }
 EXPORT_SYMBOL_GPL(klist_next);
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
new file mode 100644
index 0000000..8347925
--- /dev/null
+++ b/lib/string_helpers.c
@@ -0,0 +1,64 @@
+/*
+ * Helpers for formatting and printing strings
+ *
+ * Copyright 31 August 2008 James Bottomley
+ */
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/string_helpers.h>
+
+/**
+ * string_get_size - get the size in the specified units
+ * @size:	The size to be converted
+ * @units:	units to use (powers of 1000 or 1024)
+ * @buf:	buffer to format to
+ * @len:	length of buffer
+ *
+ * This function returns a string formatted to 3 significant figures
+ * giving the size in the required units.  Returns 0 on success or
+ * error on failure.  @buf is always zero terminated.
+ *
+ */
+int string_get_size(u64 size, const enum string_size_units units,
+		    char *buf, int len)
+{
+	const char *units_10[] = { "B", "KB", "MB", "GB", "TB", "PB",
+				   "EB", "ZB", "YB", NULL};
+	const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB",
+				 "EiB", "ZiB", "YiB", NULL };
+	const char **units_str[] = {
+		[STRING_UNITS_10] =  units_10,
+		[STRING_UNITS_2] = units_2,
+	};
+	const int divisor[] = {
+		[STRING_UNITS_10] = 1000,
+		[STRING_UNITS_2] = 1024,
+	};
+	int i, j;
+	u64 remainder = 0, sf_cap;
+	char tmp[8];
+
+	tmp[0] = '\0';
+
+	for (i = 0; size > divisor[units] && units_str[units][i]; i++)
+		remainder = do_div(size, divisor[units]);
+
+	sf_cap = size;
+	for (j = 0; sf_cap*10 < 1000; j++)
+		sf_cap *= 10;
+
+	if (j) {
+		remainder *= 1000;
+		do_div(remainder, divisor[units]);
+		snprintf(tmp, sizeof(tmp), ".%03lld",
+			 (unsigned long long)remainder);
+		tmp[j+1] = '\0';
+	}
+
+	snprintf(buf, len, "%lld%s%s", (unsigned long long)size,
+		 tmp, units_str[units][i]);
+
+	return 0;
+}
+EXPORT_SYMBOL(string_get_size);
diff --git a/mm/bounce.c b/mm/bounce.c
index b6d2d0f..06722c4 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -267,7 +267,7 @@
 	/*
 	 * Data-less bio, nothing to bounce
 	 */
-	if (bio_empty_barrier(*bio_orig))
+	if (!bio_has_data(*bio_orig))
 		return;
 
 	/*