Merge git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6 * git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6: (37 commits) [SCSI] zfcp: fix double dbf id usage [SCSI] zfcp: wait on SCSI work to be finished before proceeding with init dev [SCSI] zfcp: fix erp list usage without using locks [SCSI] zfcp: prevent fc_remote_port_delete calls for unregistered rport [SCSI] zfcp: fix deadlock caused by shared work queue tasks [SCSI] zfcp: put threshold data in hba trace [SCSI] zfcp: Simplify zfcp data structures [SCSI] zfcp: Simplify get_adapter_by_busid [SCSI] zfcp: remove all typedefs and replace them with standards [SCSI] zfcp: attach and release SAN nameserver port on demand [SCSI] zfcp: remove unused references, declarations and flags [SCSI] zfcp: Update message with input from review [SCSI] zfcp: add queue_full sysfs attribute [SCSI] scsi_dh: suppress comparison warning [SCSI] scsi_dh: add Dell product information into rdac device handler [SCSI] qla2xxx: remove the unused SCSI_QLOGIC_FC_FIRMWARE option [SCSI] qla2xxx: fix printk format warnings [SCSI] qla2xxx: Update version number to 8.02.01-k8. [SCSI] qla2xxx: Ignore payload reserved-bits during RSCN processing. [SCSI] qla2xxx: Additional residual-count corrections during UNDERRUN handling. ...

commit: ef5bef357cdf49f3a386c7102dbf3be5f7e5c913 [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Fri Oct 10 10:53:26 2008 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Fri Oct 10 10:53:26 2008 -0700
tree: 48d9dc86768e3e146267ea21d0c898f9008275a1
parent: e26feff647ef34423b048b940540a0059001ddb0 [diff]
parent: 41bfcf90101601f9507240ff0435c1b73d28a132 [diff]
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 5b5aba4..7306081 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX

@@ -251,8 +251,6 @@
 	- how to execute Mono-based .NET binaries with the help of BINFMT_MISC.
 moxa-smartio
 	- file with info on installing/using Moxa multiport serial driver.
-mtrr.txt
-	- how to use PPro Memory Type Range Registers to increase performance.
 mutex-design.txt
 	- info on the generic mutex subsystem.
 namespaces/

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index d8b63d1..b8e8646 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt

@@ -337,7 +337,7 @@
 	int i, count = dma_map_sg(dev, sglist, nents, direction);
 	struct scatterlist *sg;
 
-	for (i = 0, sg = sglist; i < count; i++, sg++) {
+	for_each_sg(sglist, sg, count, i) {
 		hw_address[i] = sg_dma_address(sg);
 		hw_len[i] = sg_dma_len(sg);
 	}

diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index b7b1482..f5696ba 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl

@@ -364,6 +364,10 @@
 !Eblock/blk-barrier.c
 !Eblock/blk-tag.c
 !Iblock/blk-tag.c
+!Eblock/blk-integrity.c
+!Iblock/blktrace.c
+!Iblock/genhd.c
+!Eblock/genhd.c
   </chapter>
 
   <chapter id="chrdev">

diff --git a/Documentation/HOWTO b/Documentation/HOWTO
index c2371c5..48a3955 100644
--- a/Documentation/HOWTO
+++ b/Documentation/HOWTO

@@ -77,7 +77,8 @@
 When a kernel change causes the interface that the kernel exposes to
 userspace to change, it is recommended that you send the information or
 a patch to the manual pages explaining the change to the manual pages
-maintainer at mtk.manpages@gmail.com.
+maintainer at mtk.manpages@gmail.com, and CC the list
+linux-api@vger.kernel.org.
 
 Here is a list of files that are in the kernel source tree that are
 required reading:

diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist
index da10e07..21f0795 100644
--- a/Documentation/SubmitChecklist
+++ b/Documentation/SubmitChecklist

@@ -67,6 +67,8 @@
 
 19: All new userspace interfaces are documented in Documentation/ABI/.
     See Documentation/ABI/README for more information.
+    Patches that change userspace interfaces should be CCed to
+    linux-api@vger.kernel.org.
 
 20: Check that it all passes `make headers_check'.
 

diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.txt
index c23cab1..7257676 100644
--- a/Documentation/block/deadline-iosched.txt
+++ b/Documentation/block/deadline-iosched.txt

@@ -30,12 +30,18 @@
 Similar to read_expire mentioned above, but for writes.
 
 
-fifo_batch
+fifo_batch	(number of requests)
 ----------
 
-When a read request expires its deadline, we must move some requests from
-the sorted io scheduler list to the block device dispatch queue. fifo_batch
-controls how many requests we move.
+Requests are grouped into ``batches'' of a particular data direction (read or
+write) which are serviced in increasing sector order.  To limit extra seeking,
+deadline expiries are only checked between batches.  fifo_batch controls the
+maximum number of requests per batch.
+
+This parameter tunes the balance between per-request latency and aggregate
+throughput.  When low latency is the primary concern, smaller is better (where
+a value of 1 yields first-come first-served behaviour).  Increasing fifo_batch
+generally improves throughput, at the cost of latency variation.
 
 
 writes_starved	(number of dispatches)

diff --git a/Documentation/cdrom/ide-cd b/Documentation/cdrom/ide-cd
index 91c0dcc..2c558cd 100644
--- a/Documentation/cdrom/ide-cd
+++ b/Documentation/cdrom/ide-cd

@@ -145,8 +145,7 @@
 
 To play an audio CD, you should first unmount and remove any data
 CDROM.  Any of the CDROM player programs should then work (workman,
-workbone, cdplayer, etc.).  Lacking anything else, you could use the
-cdtester program in Documentation/cdrom/sbpcd.
+workbone, cdplayer, etc.).
 
 On a few drives, you can read digital audio directly using a program
 such as cdda2wav.  The only types of drive which I've heard support

diff --git a/Documentation/cpu-freq/index.txt b/Documentation/cpu-freq/index.txt
index ffdb532..3d0b915 100644
--- a/Documentation/cpu-freq/index.txt
+++ b/Documentation/cpu-freq/index.txt

@@ -35,11 +35,9 @@
 ------------
 There is a CPU frequency changing CVS commit and general list where
 you can report bugs, problems or submit patches. To post a message,
-send an email to cpufreq@lists.linux.org.uk, to subscribe go to
-http://lists.linux.org.uk/mailman/listinfo/cpufreq. Previous post to the
-mailing list are available to subscribers at
-http://lists.linux.org.uk/mailman/private/cpufreq/.
-
+send an email to cpufreq@vger.kernel.org, to subscribe go to
+http://vger.kernel.org/vger-lists.html#cpufreq and follow the
+instructions there.
 
 Links
 -----
@@ -50,7 +48,7 @@
 * http://cvs.arm.linux.org.uk/
 
 the CPUFreq Mailing list:
-* http://lists.linux.org.uk/mailman/listinfo/cpufreq
+* http://vger.kernel.org/vger-lists.html#cpufreq
 
 Clock and voltage scaling for the SA-1100:
 * http://www.lartmaker.nl/projects/scaling

diff --git a/Documentation/hwmon/adt7473 b/Documentation/hwmon/adt7473
index 2126de3..1cbf671 100644
--- a/Documentation/hwmon/adt7473
+++ b/Documentation/hwmon/adt7473

@@ -14,14 +14,14 @@
 
 This driver implements support for the Analog Devices ADT7473 chip family.
 
-The LM85 uses the 2-wire interface compatible with the SMBUS 2.0
+The ADT7473 uses the 2-wire interface compatible with the SMBUS 2.0
 specification. Using an analog to digital converter it measures three (3)
-temperatures and two (2) voltages. It has three (3) 16-bit counters for
+temperatures and two (2) voltages. It has four (4) 16-bit counters for
 measuring fan speed. There are three (3) PWM outputs that can be used
 to control fan speed.
 
 A sophisticated control system for the PWM outputs is designed into the
-LM85 that allows fan speed to be adjusted automatically based on any of the
+ADT7473 that allows fan speed to be adjusted automatically based on any of the
 three temperature sensors. Each PWM output is individually adjustable and
 programmable. Once configured, the ADT7473 will adjust the PWM outputs in
 response to the measured temperatures without further host intervention.
@@ -46,14 +46,6 @@
 The Analog Devices datasheet is very detailed and describes a procedure for
 determining an optimal configuration for the automatic PWM control.
 
-Hardware Configurations
------------------------
-
-The ADT7473 chips have an optional SMBALERT output that can be used to
-signal the chipset in case a limit is exceeded or the temperature sensors
-fail. Individual sensor interrupts can be masked so they won't trigger
-SMBALERT. The SMBALERT output if configured replaces the PWM2 function.
-
 Configuration Notes
 -------------------
 
@@ -61,8 +53,8 @@
 
 * PWM Control
 
-* pwm#_auto_point1_pwm and pwm#_auto_point1_temp and
-* pwm#_auto_point2_pwm and pwm#_auto_point2_temp -
+* pwm#_auto_point1_pwm and temp#_auto_point1_temp and
+* pwm#_auto_point2_pwm and temp#_auto_point2_temp -
 
 point1: Set the pwm speed at a lower temperature bound.
 point2: Set the pwm speed at a higher temperature bound.

diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface
index 2d84573..6dbfd5e 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface

@@ -329,6 +329,10 @@
 				Unit: microWatt
 				RO
 
+power[1-*]_average_interval	Power use averaging interval
+				Unit: milliseconds
+				RW
+
 power[1-*]_average_highest	Historical average maximum power use
 				Unit: microWatt
 				RO
@@ -354,6 +358,14 @@
 				WO
 
 **********
+* Energy *
+**********
+
+energy[1-*]_input		Cumulative energy use
+				Unit: microJoule
+				RO
+
+**********
 * Alarms *
 **********
 

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1150444..329dcab 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt

@@ -463,12 +463,6 @@
 			Range: 0 - 8192
 			Default: 64
 
-	disable_8254_timer
-	enable_8254_timer
-			[IA32/X86_64] Disable/Enable interrupt 0 timer routing
-			over the 8254 in addition to over the IO-APIC. The
-			kernel tries to set a sensible default.
-
 	hpet=		[X86-32,HPET] option to control HPET usage
 			Format: { enable (default) | disable | force }
 			disable: disable HPET and use PIT instead
@@ -1882,6 +1876,12 @@
 	shapers=	[NET]
 			Maximal number of shapers.
 
+	show_msr=	[x86] show boot-time MSR settings
+			Format: { <integer> }
+			Show boot-time (BIOS-initialized) MSR settings.
+			The parameter means the number of CPUs to show,
+			for example 1 means boot CPU only.
+
 	sim710=		[SCSI,HW]
 			See header of drivers/scsi/sim710.c.
 

diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx
index 89c7f32..53449cb 100644
--- a/Documentation/video4linux/CARDLIST.em28xx
+++ b/Documentation/video4linux/CARDLIST.em28xx

@@ -46,7 +46,7 @@
  45 -> Pinnacle PCTV DVB-T                      (em2870)
  46 -> Compro, VideoMate U3                     (em2870)        [185b:2870]
  47 -> KWorld DVB-T 305U                        (em2880)        [eb1a:e305]
- 48 -> KWorld DVB-T 310U                        (em2880)
+ 48 -> KWorld DVB-T 310U                        (em2880)        [eb1a:e310]
  49 -> MSI DigiVox A/D                          (em2880)        [eb1a:e310]
  50 -> MSI DigiVox A/D II                       (em2880)        [eb1a:e320]
  51 -> Terratec Hybrid XS Secam                 (em2880)        [0ccd:004c]

diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt
index 0f03900..9a3e4d7 100644
--- a/Documentation/video4linux/gspca.txt
+++ b/Documentation/video4linux/gspca.txt

@@ -190,6 +190,7 @@
 pac7311		093a:2621	PAC731x
 pac7311		093a:2624	PAC7302
 pac7311		093a:2626	Labtec 2200
+pac7311		093a:262a	Webcam 300k
 zc3xx		0ac8:0302	Z-star Vimicro zc0302
 vc032x		0ac8:0321	Vimicro generic vc0321
 vc032x		0ac8:0323	Vimicro Vc0323

diff --git a/Documentation/x86/00-INDEX b/Documentation/x86/00-INDEX
new file mode 100644
index 0000000..dbe3377
--- /dev/null
+++ b/Documentation/x86/00-INDEX

@@ -0,0 +1,4 @@
+00-INDEX
+	- this file
+mtrr.txt
+	- how to use x86 Memory Type Range Registers to increase performance

diff --git a/Documentation/x86/i386/boot.txt b/Documentation/x86/boot.txt
similarity index 99%
rename from Documentation/x86/i386/boot.txt
rename to Documentation/x86/boot.txt
index 147bfe5..83c0033 100644
--- a/Documentation/x86/i386/boot.txt
+++ b/Documentation/x86/boot.txt

@@ -308,7 +308,7 @@
 
 Field name:	start_sys
 Type:		read
-Offset/size:	0x20c/4
+Offset/size:	0x20c/2
 Protocol:	2.00+
 
   The load low segment (0x1000).  Obsolete.

diff --git a/Documentation/mtrr.txt b/Documentation/x86/mtrr.txt
similarity index 99%
rename from Documentation/mtrr.txt
rename to Documentation/x86/mtrr.txt
index c39ac39..cc071dc 100644
--- a/Documentation/mtrr.txt
+++ b/Documentation/x86/mtrr.txt

@@ -18,7 +18,7 @@
   The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
   MTRRs. These are supported.  The AMD Athlon family provide 8 Intel
   style MTRRs.
-  
+
   The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These
   are supported.
 
@@ -87,7 +87,7 @@
 reg01: base=0xfb000000 (4016MB), size=  16MB: write-combining, count=1
 reg02: base=0xfb000000 (4016MB), size=   4kB: uncachable, count=1
 
-Some cards (especially Voodoo Graphics boards) need this 4 kB area 
+Some cards (especially Voodoo Graphics boards) need this 4 kB area
 excluded from the beginning of the region because it is used for
 registers.
 

diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
index 17965f9..c93ff5f 100644
--- a/Documentation/x86/pat.txt
+++ b/Documentation/x86/pat.txt

@@ -14,6 +14,10 @@
 ones that will be supported at this time are Write-back, Uncached,
 Write-combined and Uncached Minus.
 
+
+PAT APIs
+--------
+
 There are many different APIs in the kernel that allows setting of memory
 attributes at the page level. In order to avoid aliasing, these interfaces
 should be used thoughtfully. Below is a table of interfaces available,
@@ -26,38 +30,38 @@
 API                    |    RAM   |  ACPI,...  |  Reserved/Holes  |
 -----------------------|----------|------------|------------------|
                        |          |            |                  |
-ioremap                |    --    |    UC      |       UC         |
+ioremap                |    --    |    UC-     |       UC-        |
                        |          |            |                  |
 ioremap_cache          |    --    |    WB      |       WB         |
                        |          |            |                  |
-ioremap_nocache        |    --    |    UC      |       UC         |
+ioremap_nocache        |    --    |    UC-     |       UC-        |
                        |          |            |                  |
 ioremap_wc             |    --    |    --      |       WC         |
                        |          |            |                  |
-set_memory_uc          |    UC    |    --      |       --         |
+set_memory_uc          |    UC-   |    --      |       --         |
  set_memory_wb         |          |            |                  |
                        |          |            |                  |
 set_memory_wc          |    WC    |    --      |       --         |
  set_memory_wb         |          |            |                  |
                        |          |            |                  |
-pci sysfs resource     |    --    |    --      |       UC         |
+pci sysfs resource     |    --    |    --      |       UC-        |
                        |          |            |                  |
 pci sysfs resource_wc  |    --    |    --      |       WC         |
  is IORESOURCE_PREFETCH|          |            |                  |
                        |          |            |                  |
-pci proc               |    --    |    --      |       UC         |
+pci proc               |    --    |    --      |       UC-        |
  !PCIIOC_WRITE_COMBINE |          |            |                  |
                        |          |            |                  |
 pci proc               |    --    |    --      |       WC         |
  PCIIOC_WRITE_COMBINE  |          |            |                  |
                        |          |            |                  |
-/dev/mem               |    --    |    UC      |       UC         |
+/dev/mem               |    --    |  WB/WC/UC- |    WB/WC/UC-     |
  read-write            |          |            |                  |
                        |          |            |                  |
-/dev/mem               |    --    |    UC      |       UC         |
+/dev/mem               |    --    |    UC-     |       UC-        |
  mmap SYNC flag        |          |            |                  |
                        |          |            |                  |
-/dev/mem               |    --    |  WB/WC/UC  |    WB/WC/UC      |
+/dev/mem               |    --    |  WB/WC/UC- |    WB/WC/UC-     |
  mmap !SYNC flag       |          |(from exist-|  (from exist-    |
  and                   |          |  ing alias)|    ing alias)    |
  any alias to this area|          |            |                  |
@@ -68,7 +72,7 @@
  and                   |          |            |                  |
  MTRR says WB          |          |            |                  |
                        |          |            |                  |
-/dev/mem               |    --    |    --      |    UC_MINUS      |
+/dev/mem               |    --    |    --      |       UC-        |
  mmap !SYNC flag       |          |            |                  |
  no alias to this area |          |            |                  |
  and                   |          |            |                  |
@@ -98,3 +102,35 @@
 
 Drivers should use set_memory_[uc|wc] to set access type for RAM ranges.
 
+
+PAT debugging
+-------------
+
+With CONFIG_DEBUG_FS enabled, PAT memtype list can be examined by
+
+# mount -t debugfs debugfs /sys/kernel/debug
+# cat /sys/kernel/debug/x86/pat_memtype_list
+PAT memtype list:
+uncached-minus @ 0x7fadf000-0x7fae0000
+uncached-minus @ 0x7fb19000-0x7fb1a000
+uncached-minus @ 0x7fb1a000-0x7fb1b000
+uncached-minus @ 0x7fb1b000-0x7fb1c000
+uncached-minus @ 0x7fb1c000-0x7fb1d000
+uncached-minus @ 0x7fb1d000-0x7fb1e000
+uncached-minus @ 0x7fb1e000-0x7fb25000
+uncached-minus @ 0x7fb25000-0x7fb26000
+uncached-minus @ 0x7fb26000-0x7fb27000
+uncached-minus @ 0x7fb27000-0x7fb28000
+uncached-minus @ 0x7fb28000-0x7fb2e000
+uncached-minus @ 0x7fb2e000-0x7fb2f000
+uncached-minus @ 0x7fb2f000-0x7fb30000
+uncached-minus @ 0x7fb31000-0x7fb32000
+uncached-minus @ 0x80000000-0x90000000
+
+This list shows physical address ranges and various PAT settings used to
+access those physical address ranges.
+
+Another, more verbose way of getting PAT related debug messages is with
+"debugpat" boot parameter. With this parameter, various debug messages are
+printed to dmesg log.
+

diff --git a/Documentation/x86/i386/usb-legacy-support.txt b/Documentation/x86/usb-legacy-support.txt
similarity index 100%
rename from Documentation/x86/i386/usb-legacy-support.txt
rename to Documentation/x86/usb-legacy-support.txt


diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index b0c7b6c..72ffb53 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt

@@ -54,10 +54,6 @@
 		 apicmaintimer. Useful when your PIT timer is totally
 		 broken.
 
-   disable_8254_timer / enable_8254_timer
-		 Enable interrupt 0 timer routing over the 8254 in addition to over
-	         the IO-APIC. The kernel tries to set a sensible default.
-
 Early Console
 
    syntax: earlyprintk=vga

diff --git a/Documentation/x86/i386/zero-page.txt b/Documentation/x86/zero-page.txt
similarity index 100%
rename from Documentation/x86/i386/zero-page.txt
rename to Documentation/x86/zero-page.txt


diff --git a/MAINTAINERS b/MAINTAINERS
index 3596d17..8dae455 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -1198,9 +1198,7 @@
 S:	Maintained
 
 CPUSETS
-P:	Paul Jackson
 P:	Paul Menage
-M:	pj@sgi.com
 M:	menage@google.com
 L:	linux-kernel@vger.kernel.org
 W:	http://www.bullopensource.org/cpuset/
@@ -2706,6 +2704,7 @@
 P:	Michael Kerrisk
 M:	mtk.manpages@gmail.com
 W:	http://www.kernel.org/doc/man-pages
+L:	linux-man@vger.kernel.org
 S:	Supported
 
 MARVELL LIBERTAS WIRELESS DRIVER

diff --git a/Makefile b/Makefile
index 1d03c16..16e3fbb 100644
--- a/Makefile
+++ b/Makefile

@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 27
-EXTRAVERSION = -rc8
+EXTRAVERSION =
 NAME = Rotary Wombat
 
 # *DOCUMENTATION*

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 49896a2..1e06d23 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig

@@ -211,6 +211,7 @@
 	select SYS_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select SYS_SUPPORTS_LITTLE_ENDIAN
+	select SYS_SUPPORTS_MIPS_CMP if BROKEN	# because SYNC_R4K is broken
 	select SYS_SUPPORTS_MULTITHREADING
 	select SYS_SUPPORTS_SMARTMIPS
 	help
@@ -1403,7 +1404,6 @@
 	depends on CPU_MIPS32_R2
 	#depends on CPU_MIPS64_R2		# once there is hardware ...
 	depends on SYS_SUPPORTS_MULTITHREADING
-	select GENERIC_CLOCKEVENTS_BROADCAST
 	select CPU_MIPSR2_IRQ_VI
 	select CPU_MIPSR2_IRQ_EI
 	select MIPS_MT
@@ -1451,32 +1451,17 @@
 	  Includes a loader for loading an elf relocatable object
 	  onto another VPE and running it.
 
-config MIPS_MT_SMTC_INSTANT_REPLAY
-	bool "Low-latency Dispatch of Deferred SMTC IPIs"
-	depends on MIPS_MT_SMTC && !PREEMPT
-	default y
-	help
-	  SMTC pseudo-interrupts between TCs are deferred and queued
-	  if the target TC is interrupt-inhibited (IXMT). In the first
-	  SMTC prototypes, these queued IPIs were serviced on return
-	  to user mode, or on entry into the kernel idle loop. The
-	  INSTANT_REPLAY option dispatches them as part of local_irq_restore()
-	  processing, which adds runtime overhead (hence the option to turn
-	  it off), but ensures that IPIs are handled promptly even under
-	  heavy I/O interrupt load.
-
 config MIPS_MT_SMTC_IM_BACKSTOP
 	bool "Use per-TC register bits as backstop for inhibited IM bits"
 	depends on MIPS_MT_SMTC
-	default y
+	default n
 	help
 	  To support multiple TC microthreads acting as "CPUs" within
 	  a VPE, VPE-wide interrupt mask bits must be specially manipulated
 	  during interrupt handling. To support legacy drivers and interrupt
 	  controller management code, SMTC has a "backstop" to track and
 	  if necessary restore the interrupt mask. This has some performance
-	  impact on interrupt service overhead. Disable it only if you know
-	  what you are doing.
+	  impact on interrupt service overhead.
 
 config MIPS_MT_SMTC_IRQAFF
 	bool "Support IRQ affinity API"
@@ -1486,10 +1471,8 @@
 	  Enables SMP IRQ affinity API (/proc/irq/*/smp_affinity, etc.)
 	  for SMTC Linux kernel. Requires platform support, of which
 	  an example can be found in the MIPS kernel i8259 and Malta
-	  platform code.  It is recommended that MIPS_MT_SMTC_INSTANT_REPLAY
-	  be enabled if MIPS_MT_SMTC_IRQAFF is used. Adds overhead to
-	  interrupt dispatch, and should be used only if you know what
-	  you are doing.
+	  platform code.  Adds some overhead to interrupt dispatch, and
+	  should be used only if you know what you are doing.
 
 config MIPS_VPE_LOADER_TOM
 	bool "Load VPE program into memory hidden from linux"
@@ -1517,6 +1500,18 @@
 	  "exit" syscall notifying other kernel modules the SP program is
 	  exiting.  You probably want to say yes here.
 
+config MIPS_CMP
+	bool "MIPS CMP framework support"
+	depends on SYS_SUPPORTS_MIPS_CMP
+	select SYNC_R4K if BROKEN
+	select SYS_SUPPORTS_SMP
+	select SYS_SUPPORTS_SCHED_SMT if SMP
+	select WEAK_ORDERING
+	default n
+	help
+	  This is a placeholder option for the GCMP work. It will need to
+	  be handled differently...
+
 config SB1_PASS_1_WORKAROUNDS
 	bool
 	depends on CPU_SB1_PASS_1
@@ -1693,6 +1688,9 @@
 config SMP_UP
 	bool
 
+config SYS_SUPPORTS_MIPS_CMP
+	bool
+
 config SYS_SUPPORTS_SMP
 	bool
 
@@ -1740,17 +1738,6 @@
 	  performance should round up your number of processors to the next
 	  power of two.
 
-config MIPS_CMP
-	bool "MIPS CMP framework support"
-	depends on SMP
-	select SYNC_R4K
-	select SYS_SUPPORTS_SCHED_SMT
-	select WEAK_ORDERING
-	default n
-	help
-	  This is a placeholder option for the GCMP work. It will need to
-	  be handled differently...
-
 source "kernel/time/Kconfig"
 
 #

diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 706f939..25775cb 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile

@@ -10,6 +10,7 @@
 
 obj-$(CONFIG_CEVT_BCM1480)	+= cevt-bcm1480.o
 obj-$(CONFIG_CEVT_R4K)		+= cevt-r4k.o
+obj-$(CONFIG_MIPS_MT_SMTC)	+= cevt-smtc.o
 obj-$(CONFIG_CEVT_DS1287)	+= cevt-ds1287.o
 obj-$(CONFIG_CEVT_GT641XX)	+= cevt-gt641xx.o
 obj-$(CONFIG_CEVT_SB1250)	+= cevt-sb1250.o

diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 24a2d90..4a4c59f 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c

@@ -12,6 +12,14 @@
 
 #include <asm/smtc_ipi.h>
 #include <asm/time.h>
+#include <asm/cevt-r4k.h>
+
+/*
+ * The SMTC Kernel for the 34K, 1004K, et. al. replaces several
+ * of these routines with SMTC-specific variants.
+ */
+
+#ifndef CONFIG_MIPS_MT_SMTC
 
 static int mips_next_event(unsigned long delta,
                            struct clock_event_device *evt)
@@ -19,60 +27,27 @@
 	unsigned int cnt;
 	int res;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	{
-	unsigned long flags, vpflags;
-	local_irq_save(flags);
-	vpflags = dvpe();
-#endif
 	cnt = read_c0_count();
 	cnt += delta;
 	write_c0_compare(cnt);
 	res = ((int)(read_c0_count() - cnt) > 0) ? -ETIME : 0;
-#ifdef CONFIG_MIPS_MT_SMTC
-	evpe(vpflags);
-	local_irq_restore(flags);
-	}
-#endif
 	return res;
 }
 
-static void mips_set_mode(enum clock_event_mode mode,
-                          struct clock_event_device *evt)
+#endif /* CONFIG_MIPS_MT_SMTC */
+
+void mips_set_clock_mode(enum clock_event_mode mode,
+				struct clock_event_device *evt)
 {
 	/* Nothing to do ...  */
 }
 
-static DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
-static int cp0_timer_irq_installed;
+DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device);
+int cp0_timer_irq_installed;
 
-/*
- * Timer ack for an R4k-compatible timer of a known frequency.
- */
-static void c0_timer_ack(void)
-{
-	write_c0_compare(read_c0_compare());
-}
+#ifndef CONFIG_MIPS_MT_SMTC
 
-/*
- * Possibly handle a performance counter interrupt.
- * Return true if the timer interrupt should not be checked
- */
-static inline int handle_perf_irq(int r2)
-{
-	/*
-	 * The performance counter overflow interrupt may be shared with the
-	 * timer interrupt (cp0_perfcount_irq < 0). If it is and a
-	 * performance counter has overflowed (perf_irq() == IRQ_HANDLED)
-	 * and we can't reliably determine if a counter interrupt has also
-	 * happened (!r2) then don't check for a timer interrupt.
-	 */
-	return (cp0_perfcount_irq < 0) &&
-		perf_irq() == IRQ_HANDLED &&
-		!r2;
-}
-
-static irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
+irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
 {
 	const int r2 = cpu_has_mips_r2;
 	struct clock_event_device *cd;
@@ -93,12 +68,8 @@
 	 * interrupt.  Being the paranoiacs we are we check anyway.
 	 */
 	if (!r2 || (read_c0_cause() & (1 << 30))) {
-		c0_timer_ack();
-#ifdef CONFIG_MIPS_MT_SMTC
-		if (cpu_data[cpu].vpe_id)
-			goto out;
-		cpu = 0;
-#endif
+		/* Clear Count/Compare Interrupt */
+		write_c0_compare(read_c0_compare());
 		cd = &per_cpu(mips_clockevent_device, cpu);
 		cd->event_handler(cd);
 	}
@@ -107,65 +78,16 @@
 	return IRQ_HANDLED;
 }
 
-static struct irqaction c0_compare_irqaction = {
+#endif /* Not CONFIG_MIPS_MT_SMTC */
+
+struct irqaction c0_compare_irqaction = {
 	.handler = c0_compare_interrupt,
-#ifdef CONFIG_MIPS_MT_SMTC
-	.flags = IRQF_DISABLED,
-#else
 	.flags = IRQF_DISABLED | IRQF_PERCPU,
-#endif
 	.name = "timer",
 };
 
-#ifdef CONFIG_MIPS_MT_SMTC
-DEFINE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device);
 
-static void smtc_set_mode(enum clock_event_mode mode,
-                          struct clock_event_device *evt)
-{
-}
-
-static void mips_broadcast(cpumask_t mask)
-{
-	unsigned int cpu;
-
-	for_each_cpu_mask(cpu, mask)
-		smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
-}
-
-static void setup_smtc_dummy_clockevent_device(void)
-{
-	//uint64_t mips_freq = mips_hpt_^frequency;
-	unsigned int cpu = smp_processor_id();
-	struct clock_event_device *cd;
-
-	cd = &per_cpu(smtc_dummy_clockevent_device, cpu);
-
-	cd->name		= "SMTC";
-	cd->features		= CLOCK_EVT_FEAT_DUMMY;
-
-	/* Calculate the min / max delta */
-	cd->mult	= 0; //div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32);
-	cd->shift		= 0; //32;
-	cd->max_delta_ns	= 0; //clockevent_delta2ns(0x7fffffff, cd);
-	cd->min_delta_ns	= 0; //clockevent_delta2ns(0x30, cd);
-
-	cd->rating		= 200;
-	cd->irq			= 17; //-1;
-//	if (cpu)
-//		cd->cpumask	= CPU_MASK_ALL; // cpumask_of_cpu(cpu);
-//	else
-		cd->cpumask	= cpumask_of_cpu(cpu);
-
-	cd->set_mode		= smtc_set_mode;
-
-	cd->broadcast		= mips_broadcast;
-
-	clockevents_register_device(cd);
-}
-#endif
-
-static void mips_event_handler(struct clock_event_device *dev)
+void mips_event_handler(struct clock_event_device *dev)
 {
 }
 
@@ -177,7 +99,23 @@
 	return (read_c0_cause() >> cp0_compare_irq) & 0x100;
 }
 
-static int c0_compare_int_usable(void)
+/*
+ * Compare interrupt can be routed and latched outside the core,
+ * so a single execution hazard barrier may not be enough to give
+ * it time to clear as seen in the Cause register.  4 time the
+ * pipeline depth seems reasonably conservative, and empirically
+ * works better in configurations with high CPU/bus clock ratios.
+ */
+
+#define compare_change_hazard() \
+	do { \
+		irq_disable_hazard(); \
+		irq_disable_hazard(); \
+		irq_disable_hazard(); \
+		irq_disable_hazard(); \
+	} while (0)
+
+int c0_compare_int_usable(void)
 {
 	unsigned int delta;
 	unsigned int cnt;
@@ -187,7 +125,7 @@
 	 */
 	if (c0_compare_int_pending()) {
 		write_c0_compare(read_c0_count());
-		irq_disable_hazard();
+		compare_change_hazard();
 		if (c0_compare_int_pending())
 			return 0;
 	}
@@ -196,7 +134,7 @@
 		cnt = read_c0_count();
 		cnt += delta;
 		write_c0_compare(cnt);
-		irq_disable_hazard();
+		compare_change_hazard();
 		if ((int)(read_c0_count() - cnt) < 0)
 		    break;
 		/* increase delta if the timer was already expired */
@@ -205,11 +143,12 @@
 	while ((int)(read_c0_count() - cnt) <= 0)
 		;	/* Wait for expiry  */
 
+	compare_change_hazard();
 	if (!c0_compare_int_pending())
 		return 0;
 
 	write_c0_compare(read_c0_count());
-	irq_disable_hazard();
+	compare_change_hazard();
 	if (c0_compare_int_pending())
 		return 0;
 
@@ -219,6 +158,8 @@
 	return 1;
 }
 
+#ifndef CONFIG_MIPS_MT_SMTC
+
 int __cpuinit mips_clockevent_init(void)
 {
 	uint64_t mips_freq = mips_hpt_frequency;
@@ -229,17 +170,6 @@
 	if (!cpu_has_counter || !mips_hpt_frequency)
 		return -ENXIO;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-	setup_smtc_dummy_clockevent_device();
-
-	/*
-	 * On SMTC we only register VPE0's compare interrupt as clockevent
-	 * device.
-	 */
-	if (cpu)
-		return 0;
-#endif
-
 	if (!c0_compare_int_usable())
 		return -ENXIO;
 
@@ -265,13 +195,9 @@
 
 	cd->rating		= 300;
 	cd->irq			= irq;
-#ifdef CONFIG_MIPS_MT_SMTC
-	cd->cpumask		= CPU_MASK_ALL;
-#else
 	cd->cpumask		= cpumask_of_cpu(cpu);
-#endif
 	cd->set_next_event	= mips_next_event;
-	cd->set_mode		= mips_set_mode;
+	cd->set_mode		= mips_set_clock_mode;
 	cd->event_handler	= mips_event_handler;
 
 	clockevents_register_device(cd);
@@ -281,12 +207,9 @@
 
 	cp0_timer_irq_installed = 1;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-#define CPUCTR_IMASKBIT (0x100 << cp0_compare_irq)
-	setup_irq_smtc(irq, &c0_compare_irqaction, CPUCTR_IMASKBIT);
-#else
 	setup_irq(irq, &c0_compare_irqaction);
-#endif
 
 	return 0;
 }
+
+#endif /* Not CONFIG_MIPS_MT_SMTC */

diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c
new file mode 100644
index 0000000..5162fe4
--- /dev/null
+++ b/arch/mips/kernel/cevt-smtc.c

@@ -0,0 +1,321 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2007 MIPS Technologies, Inc.
+ * Copyright (C) 2007 Ralf Baechle <ralf@linux-mips.org>
+ * Copyright (C) 2008 Kevin D. Kissell, Paralogos sarl
+ */
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+
+#include <asm/smtc_ipi.h>
+#include <asm/time.h>
+#include <asm/cevt-r4k.h>
+
+/*
+ * Variant clock event timer support for SMTC on MIPS 34K, 1004K
+ * or other MIPS MT cores.
+ *
+ * Notes on SMTC Support:
+ *
+ * SMTC has multiple microthread TCs pretending to be Linux CPUs.
+ * But there's only one Count/Compare pair per VPE, and Compare
+ * interrupts are taken opportunisitically by available TCs
+ * bound to the VPE with the Count register.  The new timer
+ * framework provides for global broadcasts, but we really
+ * want VPE-level multicasts for best behavior. So instead
+ * of invoking the high-level clock-event broadcast code,
+ * this version of SMTC support uses the historical SMTC
+ * multicast mechanisms "under the hood", appearing to the
+ * generic clock layer as if the interrupts are per-CPU.
+ *
+ * The approach taken here is to maintain a set of NR_CPUS
+ * virtual timers, and track which "CPU" needs to be alerted
+ * at each event.
+ *
+ * It's unlikely that we'll see a MIPS MT core with more than
+ * 2 VPEs, but we *know* that we won't need to handle more
+ * VPEs than we have "CPUs".  So NCPUs arrays of NCPUs elements
+ * is always going to be overkill, but always going to be enough.
+ */
+
+unsigned long smtc_nexttime[NR_CPUS][NR_CPUS];
+static int smtc_nextinvpe[NR_CPUS];
+
+/*
+ * Timestamps stored are absolute values to be programmed
+ * into Count register.  Valid timestamps will never be zero.
+ * If a Zero Count value is actually calculated, it is converted
+ * to be a 1, which will introduce 1 or two CPU cycles of error
+ * roughly once every four billion events, which at 1000 HZ means
+ * about once every 50 days.  If that's actually a problem, one
+ * could alternate squashing 0 to 1 and to -1.
+ */
+
+#define MAKEVALID(x) (((x) == 0L) ? 1L : (x))
+#define ISVALID(x) ((x) != 0L)
+
+/*
+ * Time comparison is subtle, as it's really truncated
+ * modular arithmetic.
+ */
+
+#define IS_SOONER(a, b, reference) \
+    (((a) - (unsigned long)(reference)) < ((b) - (unsigned long)(reference)))
+
+/*
+ * CATCHUP_INCREMENT, used when the function falls behind the counter.
+ * Could be an increasing function instead of a constant;
+ */
+
+#define CATCHUP_INCREMENT 64
+
+static int mips_next_event(unsigned long delta,
+				struct clock_event_device *evt)
+{
+	unsigned long flags;
+	unsigned int mtflags;
+	unsigned long timestamp, reference, previous;
+	unsigned long nextcomp = 0L;
+	int vpe = current_cpu_data.vpe_id;
+	int cpu = smp_processor_id();
+	local_irq_save(flags);
+	mtflags = dmt();
+
+	/*
+	 * Maintain the per-TC virtual timer
+	 * and program the per-VPE shared Count register
+	 * as appropriate here...
+	 */
+	reference = (unsigned long)read_c0_count();
+	timestamp = MAKEVALID(reference + delta);
+	/*
+	 * To really model the clock, we have to catch the case
+	 * where the current next-in-VPE timestamp is the old
+	 * timestamp for the calling CPE, but the new value is
+	 * in fact later.  In that case, we have to do a full
+	 * scan and discover the new next-in-VPE CPU id and
+	 * timestamp.
+	 */
+	previous = smtc_nexttime[vpe][cpu];
+	if (cpu == smtc_nextinvpe[vpe] && ISVALID(previous)
+	    && IS_SOONER(previous, timestamp, reference)) {
+		int i;
+		int soonest = cpu;
+
+		/*
+		 * Update timestamp array here, so that new
+		 * value gets considered along with those of
+		 * other virtual CPUs on the VPE.
+		 */
+		smtc_nexttime[vpe][cpu] = timestamp;
+		for_each_online_cpu(i) {
+			if (ISVALID(smtc_nexttime[vpe][i])
+			    && IS_SOONER(smtc_nexttime[vpe][i],
+				smtc_nexttime[vpe][soonest], reference)) {
+				    soonest = i;
+			}
+		}
+		smtc_nextinvpe[vpe] = soonest;
+		nextcomp = smtc_nexttime[vpe][soonest];
+	/*
+	 * Otherwise, we don't have to process the whole array rank,
+	 * we just have to see if the event horizon has gotten closer.
+	 */
+	} else {
+		if (!ISVALID(smtc_nexttime[vpe][smtc_nextinvpe[vpe]]) ||
+		    IS_SOONER(timestamp,
+			smtc_nexttime[vpe][smtc_nextinvpe[vpe]], reference)) {
+			    smtc_nextinvpe[vpe] = cpu;
+			    nextcomp = timestamp;
+		}
+		/*
+		 * Since next-in-VPE may me the same as the executing
+		 * virtual CPU, we update the array *after* checking
+		 * its value.
+		 */
+		smtc_nexttime[vpe][cpu] = timestamp;
+	}
+
+	/*
+	 * It may be that, in fact, we don't need to update Compare,
+	 * but if we do, we want to make sure we didn't fall into
+	 * a crack just behind Count.
+	 */
+	if (ISVALID(nextcomp)) {
+		write_c0_compare(nextcomp);
+		ehb();
+		/*
+		 * We never return an error, we just make sure
+		 * that we trigger the handlers as quickly as
+		 * we can if we fell behind.
+		 */
+		while ((nextcomp - (unsigned long)read_c0_count())
+			> (unsigned long)LONG_MAX) {
+			nextcomp += CATCHUP_INCREMENT;
+			write_c0_compare(nextcomp);
+			ehb();
+		}
+	}
+	emt(mtflags);
+	local_irq_restore(flags);
+	return 0;
+}
+
+
+void smtc_distribute_timer(int vpe)
+{
+	unsigned long flags;
+	unsigned int mtflags;
+	int cpu;
+	struct clock_event_device *cd;
+	unsigned long nextstamp = 0L;
+	unsigned long reference;
+
+
+repeat:
+	for_each_online_cpu(cpu) {
+	    /*
+	     * Find virtual CPUs within the current VPE who have
+	     * unserviced timer requests whose time is now past.
+	     */
+	    local_irq_save(flags);
+	    mtflags = dmt();
+	    if (cpu_data[cpu].vpe_id == vpe &&
+		ISVALID(smtc_nexttime[vpe][cpu])) {
+		reference = (unsigned long)read_c0_count();
+		if ((smtc_nexttime[vpe][cpu] - reference)
+			 > (unsigned long)LONG_MAX) {
+			    smtc_nexttime[vpe][cpu] = 0L;
+			    emt(mtflags);
+			    local_irq_restore(flags);
+			    /*
+			     * We don't send IPIs to ourself.
+			     */
+			    if (cpu != smp_processor_id()) {
+				smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
+			    } else {
+				cd = &per_cpu(mips_clockevent_device, cpu);
+				cd->event_handler(cd);
+			    }
+		} else {
+			/* Local to VPE but Valid Time not yet reached. */
+			if (!ISVALID(nextstamp) ||
+			    IS_SOONER(smtc_nexttime[vpe][cpu], nextstamp,
+			    reference)) {
+				smtc_nextinvpe[vpe] = cpu;
+				nextstamp = smtc_nexttime[vpe][cpu];
+			}
+			emt(mtflags);
+			local_irq_restore(flags);
+		}
+	    } else {
+		emt(mtflags);
+		local_irq_restore(flags);
+
+	    }
+	}
+	/* Reprogram for interrupt at next soonest timestamp for VPE */
+	if (ISVALID(nextstamp)) {
+		write_c0_compare(nextstamp);
+		ehb();
+		if ((nextstamp - (unsigned long)read_c0_count())
+			> (unsigned long)LONG_MAX)
+				goto repeat;
+	}
+}
+
+
+irqreturn_t c0_compare_interrupt(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	/* If we're running SMTC, we've got MIPS MT and therefore MIPS32R2 */
+	handle_perf_irq(1);
+
+	if (read_c0_cause() & (1 << 30)) {
+		/* Clear Count/Compare Interrupt */
+		write_c0_compare(read_c0_compare());
+		smtc_distribute_timer(cpu_data[cpu].vpe_id);
+	}
+	return IRQ_HANDLED;
+}
+
+
+int __cpuinit mips_clockevent_init(void)
+{
+	uint64_t mips_freq = mips_hpt_frequency;
+	unsigned int cpu = smp_processor_id();
+	struct clock_event_device *cd;
+	unsigned int irq;
+	int i;
+	int j;
+
+	if (!cpu_has_counter || !mips_hpt_frequency)
+		return -ENXIO;
+	if (cpu == 0) {
+		for (i = 0; i < num_possible_cpus(); i++) {
+			smtc_nextinvpe[i] = 0;
+			for (j = 0; j < num_possible_cpus(); j++)
+				smtc_nexttime[i][j] = 0L;
+		}
+		/*
+		 * SMTC also can't have the usablility test
+		 * run by secondary TCs once Compare is in use.
+		 */
+		if (!c0_compare_int_usable())
+			return -ENXIO;
+	}
+
+	/*
+	 * With vectored interrupts things are getting platform specific.
+	 * get_c0_compare_int is a hook to allow a platform to return the
+	 * interrupt number of it's liking.
+	 */
+	irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq;
+	if (get_c0_compare_int)
+		irq = get_c0_compare_int();
+
+	cd = &per_cpu(mips_clockevent_device, cpu);
+
+	cd->name		= "MIPS";
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+
+	/* Calculate the min / max delta */
+	cd->mult	= div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32);
+	cd->shift		= 32;
+	cd->max_delta_ns	= clockevent_delta2ns(0x7fffffff, cd);
+	cd->min_delta_ns	= clockevent_delta2ns(0x300, cd);
+
+	cd->rating		= 300;
+	cd->irq			= irq;
+	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->set_next_event	= mips_next_event;
+	cd->set_mode		= mips_set_clock_mode;
+	cd->event_handler	= mips_event_handler;
+
+	clockevents_register_device(cd);
+
+	/*
+	 * On SMTC we only want to do the data structure
+	 * initialization and IRQ setup once.
+	 */
+	if (cpu)
+		return 0;
+	/*
+	 * And we need the hwmask associated with the c0_compare
+	 * vector to be initialized.
+	 */
+	irq_hwmask[irq] = (0x100 << cp0_compare_irq);
+	if (cp0_timer_irq_installed)
+		return 0;
+
+	cp0_timer_irq_installed = 1;
+
+	setup_irq(irq, &c0_compare_irqaction);
+
+	return 0;
+}

diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 11c92dc..e621fda 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c

@@ -54,14 +54,18 @@
  * interrupt is requested" restriction in the MIPS32/MIPS64 architecture makes
  * using this version a gamble.
  */
-static void r4k_wait_irqoff(void)
+void r4k_wait_irqoff(void)
 {
 	local_irq_disable();
 	if (!need_resched())
-		__asm__("	.set	mips3		\n"
+		__asm__("	.set	push		\n"
+			"	.set	mips3		\n"
 			"	wait			\n"
-			"	.set	mips0		\n");
+			"	.set	pop		\n");
 	local_irq_enable();
+	__asm__(" 	.globl __pastwait	\n"
+		"__pastwait:			\n");
+	return;
 }
 
 /*

diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index e29598a..ffa3310 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S

@@ -79,11 +79,6 @@
 
 FEXPORT(restore_all)			# restore full frame
 #ifdef CONFIG_MIPS_MT_SMTC
-/* Detect and execute deferred IPI "interrupts" */
-	LONG_L	s0, TI_REGS($28)
-	LONG_S	sp, TI_REGS($28)
-	jal	deferred_smtc_ipi
-	LONG_S	s0, TI_REGS($28)
 #ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
 /* Re-arm any temporarily masked interrupts not explicitly "acked" */
 	mfc0	v0, CP0_TCSTATUS
@@ -112,6 +107,11 @@
 	xor	t0, t0, t3
 	mtc0	t0, CP0_TCCONTEXT
 #endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */
+/* Detect and execute deferred IPI "interrupts" */
+	LONG_L	s0, TI_REGS($28)
+	LONG_S	sp, TI_REGS($28)
+	jal	deferred_smtc_ipi
+	LONG_S	s0, TI_REGS($28)
 #endif /* CONFIG_MIPS_MT_SMTC */
 	.set	noat
 	RESTORE_TEMP

diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index f886dd7..01dcbe3 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S

@@ -282,8 +282,8 @@
 	and	t0, a0, t1
 #ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
 	mfc0	t2, CP0_TCCONTEXT
-	or	t0, t0, t2
-	mtc0	t0, CP0_TCCONTEXT
+	or	t2, t0, t2
+	mtc0	t2, CP0_TCCONTEXT
 #endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */
 	xor	t1, t1, t0
 	mtc0	t1, CP0_STATUS

diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 3613645..492a0a8 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S

@@ -22,6 +22,7 @@
 #include <asm/irqflags.h>
 #include <asm/regdef.h>
 #include <asm/page.h>
+#include <asm/pgtable-bits.h>
 #include <asm/mipsregs.h>
 #include <asm/stackframe.h>
 

diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index df4d3f2..dc9eb72 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c

@@ -159,7 +159,7 @@
 /*
  * FPU Use Factor empirically derived from experiments on 34K
  */
-#define FPUSEFACTOR 333
+#define FPUSEFACTOR 2000
 
 static __init int mt_fp_affinity_init(void)
 {

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index ce76843..22fc19b 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c

@@ -55,7 +55,7 @@
 	while (1) {
 		tick_nohz_stop_sched_tick(1);
 		while (!need_resched()) {
-#ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG
+#ifdef CONFIG_MIPS_MT_SMTC
 			extern void smtc_idle_loop_hook(void);
 
 			smtc_idle_loop_hook();
@@ -145,19 +145,18 @@
 	 */
 	p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
 	childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
+
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * SMTC restores TCStatus after Status, and the CU bits
+	 * are aliased there.
+	 */
+	childregs->cp0_tcstatus &= ~(ST0_CU2|ST0_CU1);
+#endif
 	clear_tsk_thread_flag(p, TIF_USEDFPU);
 
 #ifdef CONFIG_MIPS_MT_FPAFF
 	clear_tsk_thread_flag(p, TIF_FPUBOUND);
-
-	/*
-	 * FPU affinity support is cleaner if we track the
-	 * user-visible CPU affinity from the very beginning.
-	 * The generic cpus_allowed mask will already have
-	 * been copied from the parent before copy_thread
-	 * is invoked.
-	 */
-	p->thread.user_cpus_allowed = p->cpus_allowed;
 #endif /* CONFIG_MIPS_MT_FPAFF */
 
 	if (clone_flags & CLONE_SETTLS)

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 35234b9..96ffc9c 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c

@@ -238,7 +238,7 @@
 		case FPC_EIR: {	/* implementation / version register */
 			unsigned int flags;
 #ifdef CONFIG_MIPS_MT_SMTC
-			unsigned int irqflags;
+			unsigned long irqflags;
 			unsigned int mtflags;
 #endif /* CONFIG_MIPS_MT_SMTC */
 

diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index a516286..897fb2b 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c

@@ -1,4 +1,21 @@
-/* Copyright (C) 2004 Mips Technologies, Inc */
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * Copyright (C) 2004 Mips Technologies, Inc
+ * Copyright (C) 2008 Kevin D. Kissell
+ */
 
 #include <linux/clockchips.h>
 #include <linux/kernel.h>
@@ -21,7 +38,6 @@
 #include <asm/time.h>
 #include <asm/addrspace.h>
 #include <asm/smtc.h>
-#include <asm/smtc_ipi.h>
 #include <asm/smtc_proc.h>
 
 /*
@@ -58,11 +74,6 @@
 
 asiduse smtc_live_asid[MAX_SMTC_TLBS][MAX_SMTC_ASIDS];
 
-/*
- * Clock interrupt "latch" buffers, per "CPU"
- */
-
-static atomic_t ipi_timer_latch[NR_CPUS];
 
 /*
  * Number of InterProcessor Interrupt (IPI) message buffers to allocate
@@ -70,7 +81,7 @@
 
 #define IPIBUF_PER_CPU 4
 
-static struct smtc_ipi_q IPIQ[NR_CPUS];
+struct smtc_ipi_q IPIQ[NR_CPUS];
 static struct smtc_ipi_q freeIPIq;
 
 
@@ -282,7 +293,7 @@
  * phys_cpu_present_map and the logical/physical mappings.
  */
 
-int __init mipsmt_build_cpu_map(int start_cpu_slot)
+int __init smtc_build_cpu_map(int start_cpu_slot)
 {
 	int i, ntcs;
 
@@ -325,7 +336,12 @@
 	write_tc_c0_tcstatus((read_tc_c0_tcstatus()
 			& ~(TCSTATUS_TKSU | TCSTATUS_DA | TCSTATUS_IXMT))
 			| TCSTATUS_A);
-	write_tc_c0_tccontext(0);
+	/*
+	 * TCContext gets an offset from the base of the IPIQ array
+	 * to be used in low-level code to detect the presence of
+	 * an active IPI queue
+	 */
+	write_tc_c0_tccontext((sizeof(struct smtc_ipi_q) * cpu) << 16);
 	/* Bind tc to vpe */
 	write_tc_c0_tcbind(vpe);
 	/* In general, all TCs should have the same cpu_data indications */
@@ -336,10 +352,18 @@
 		cpu_data[cpu].options &= ~MIPS_CPU_FPU;
 	cpu_data[cpu].vpe_id = vpe;
 	cpu_data[cpu].tc_id = tc;
+	/* Multi-core SMTC hasn't been tested, but be prepared */
+	cpu_data[cpu].core = (read_vpe_c0_ebase() >> 1) & 0xff;
 }
 
+/*
+ * Tweak to get Count registes in as close a sync as possible.
+ * Value seems good for 34K-class cores.
+ */
 
-void mipsmt_prepare_cpus(void)
+#define CP0_SKEW 8
+
+void smtc_prepare_cpus(int cpus)
 {
 	int i, vpe, tc, ntc, nvpe, tcpervpe[NR_CPUS], slop, cpu;
 	unsigned long flags;
@@ -363,13 +387,13 @@
 		IPIQ[i].head = IPIQ[i].tail = NULL;
 		spin_lock_init(&IPIQ[i].lock);
 		IPIQ[i].depth = 0;
-		atomic_set(&ipi_timer_latch[i], 0);
 	}
 
 	/* cpu_data index starts at zero */
 	cpu = 0;
 	cpu_data[cpu].vpe_id = 0;
 	cpu_data[cpu].tc_id = 0;
+	cpu_data[cpu].core = (read_c0_ebase() >> 1) & 0xff;
 	cpu++;
 
 	/* Report on boot-time options */
@@ -484,7 +508,8 @@
 			write_vpe_c0_compare(0);
 			/* Propagate Config7 */
 			write_vpe_c0_config7(read_c0_config7());
-			write_vpe_c0_count(read_c0_count());
+			write_vpe_c0_count(read_c0_count() + CP0_SKEW);
+			ehb();
 		}
 		/* enable multi-threading within VPE */
 		write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() | VPECONTROL_TE);
@@ -556,7 +581,7 @@
 void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle)
 {
 	extern u32 kernelsp[NR_CPUS];
-	long flags;
+	unsigned long flags;
 	int mtflags;
 
 	LOCK_MT_PRA();
@@ -585,24 +610,22 @@
 
 void smtc_init_secondary(void)
 {
-	/*
-	 * Start timer on secondary VPEs if necessary.
-	 * plat_timer_setup has already have been invoked by init/main
-	 * on "boot" TC.  Like per_cpu_trap_init() hack, this assumes that
-	 * SMTC init code assigns TCs consdecutively and in ascending order
-	 * to across available VPEs.
-	 */
-	if (((read_c0_tcbind() & TCBIND_CURTC) != 0) &&
-	    ((read_c0_tcbind() & TCBIND_CURVPE)
-	    != cpu_data[smp_processor_id() - 1].vpe_id)){
-		write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
-	}
-
 	local_irq_enable();
 }
 
 void smtc_smp_finish(void)
 {
+	int cpu = smp_processor_id();
+
+	/*
+	 * Lowest-numbered CPU per VPE starts a clock tick.
+	 * Like per_cpu_trap_init() hack, this assumes that
+	 * SMTC init code assigns TCs consdecutively and
+	 * in ascending order across available VPEs.
+	 */
+	if (cpu > 0 && (cpu_data[cpu].vpe_id != cpu_data[cpu - 1].vpe_id))
+		write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
+
 	printk("TC %d going on-line as CPU %d\n",
 		cpu_data[smp_processor_id()].tc_id, smp_processor_id());
 }
@@ -753,8 +776,10 @@
 {
 	int tcstatus;
 	struct smtc_ipi *pipi;
-	long flags;
+	unsigned long flags;
 	int mtflags;
+	unsigned long tcrestart;
+	extern void r4k_wait_irqoff(void), __pastwait(void);
 
 	if (cpu == smp_processor_id()) {
 		printk("Cannot Send IPI to self!\n");
@@ -771,8 +796,6 @@
 	pipi->arg = (void *)action;
 	pipi->dest = cpu;
 	if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) {
-		if (type == SMTC_CLOCK_TICK)
-			atomic_inc(&ipi_timer_latch[cpu]);
 		/* If not on same VPE, enqueue and send cross-VPE interrupt */
 		smtc_ipi_nq(&IPIQ[cpu], pipi);
 		LOCK_CORE_PRA();
@@ -800,22 +823,29 @@
 
 		if ((tcstatus & TCSTATUS_IXMT) != 0) {
 			/*
-			 * Spin-waiting here can deadlock,
-			 * so we queue the message for the target TC.
+			 * If we're in the the irq-off version of the wait
+			 * loop, we need to force exit from the wait and
+			 * do a direct post of the IPI.
+			 */
+			if (cpu_wait == r4k_wait_irqoff) {
+				tcrestart = read_tc_c0_tcrestart();
+				if (tcrestart >= (unsigned long)r4k_wait_irqoff
+				    && tcrestart < (unsigned long)__pastwait) {
+					write_tc_c0_tcrestart(__pastwait);
+					tcstatus &= ~TCSTATUS_IXMT;
+					write_tc_c0_tcstatus(tcstatus);
+					goto postdirect;
+				}
+			}
+			/*
+			 * Otherwise we queue the message for the target TC
+			 * to pick up when he does a local_irq_restore()
 			 */
 			write_tc_c0_tchalt(0);
 			UNLOCK_CORE_PRA();
-			/* Try to reduce redundant timer interrupt messages */
-			if (type == SMTC_CLOCK_TICK) {
-			    if (atomic_postincrement(&ipi_timer_latch[cpu])!=0){
-				smtc_ipi_nq(&freeIPIq, pipi);
-				return;
-			    }
-			}
 			smtc_ipi_nq(&IPIQ[cpu], pipi);
 		} else {
-			if (type == SMTC_CLOCK_TICK)
-				atomic_inc(&ipi_timer_latch[cpu]);
+postdirect:
 			post_direct_ipi(cpu, pipi);
 			write_tc_c0_tchalt(0);
 			UNLOCK_CORE_PRA();
@@ -883,7 +913,7 @@
 	smp_call_function_interrupt();
 }
 
-DECLARE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device);
+DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device);
 
 void ipi_decode(struct smtc_ipi *pipi)
 {
@@ -891,20 +921,13 @@
 	struct clock_event_device *cd;
 	void *arg_copy = pipi->arg;
 	int type_copy = pipi->type;
-	int ticks;
-
 	smtc_ipi_nq(&freeIPIq, pipi);
 	switch (type_copy) {
 	case SMTC_CLOCK_TICK:
 		irq_enter();
 		kstat_this_cpu.irqs[MIPS_CPU_IRQ_BASE + 1]++;
-		cd = &per_cpu(smtc_dummy_clockevent_device, cpu);
-		ticks = atomic_read(&ipi_timer_latch[cpu]);
-		atomic_sub(ticks, &ipi_timer_latch[cpu]);
-		while (ticks) {
-			cd->event_handler(cd);
-			ticks--;
-		}
+		cd = &per_cpu(mips_clockevent_device, cpu);
+		cd->event_handler(cd);
 		irq_exit();
 		break;
 
@@ -937,24 +960,48 @@
 	}
 }
 
+/*
+ * Similar to smtc_ipi_replay(), but invoked from context restore,
+ * so it reuses the current exception frame rather than set up a
+ * new one with self_ipi.
+ */
+
 void deferred_smtc_ipi(void)
 {
-	struct smtc_ipi *pipi;
-	unsigned long flags;
-/* DEBUG */
-	int q = smp_processor_id();
+	int cpu = smp_processor_id();
 
 	/*
 	 * Test is not atomic, but much faster than a dequeue,
 	 * and the vast majority of invocations will have a null queue.
+	 * If irq_disabled when this was called, then any IPIs queued
+	 * after we test last will be taken on the next irq_enable/restore.
+	 * If interrupts were enabled, then any IPIs added after the
+	 * last test will be taken directly.
 	 */
-	if (IPIQ[q].head != NULL) {
-		while((pipi = smtc_ipi_dq(&IPIQ[q])) != NULL) {
-			/* ipi_decode() should be called with interrupts off */
-			local_irq_save(flags);
+
+	while (IPIQ[cpu].head != NULL) {
+		struct smtc_ipi_q *q = &IPIQ[cpu];
+		struct smtc_ipi *pipi;
+		unsigned long flags;
+
+		/*
+		 * It may be possible we'll come in with interrupts
+		 * already enabled.
+		 */
+		local_irq_save(flags);
+
+		spin_lock(&q->lock);
+		pipi = __smtc_ipi_dq(q);
+		spin_unlock(&q->lock);
+		if (pipi != NULL)
 			ipi_decode(pipi);
-			local_irq_restore(flags);
-		}
+		/*
+		 * The use of the __raw_local restore isn't
+		 * as obviously necessary here as in smtc_ipi_replay(),
+		 * but it's more efficient, given that we're already
+		 * running down the IPI queue.
+		 */
+		__raw_local_irq_restore(flags);
 	}
 }
 
@@ -975,7 +1022,7 @@
 	struct smtc_ipi *pipi;
 	unsigned long tcstatus;
 	int sent;
-	long flags;
+	unsigned long flags;
 	unsigned int mtflags;
 	unsigned int vpflags;
 
@@ -1066,55 +1113,53 @@
 
 /*
  * SMTC-specific hacks invoked from elsewhere in the kernel.
- *
- * smtc_ipi_replay is called from raw_local_irq_restore which is only ever
- * called with interrupts disabled.  We do rely on interrupts being disabled
- * here because using spin_lock_irqsave()/spin_unlock_irqrestore() would
- * result in a recursive call to raw_local_irq_restore().
  */
 
-static void __smtc_ipi_replay(void)
+ /*
+  * smtc_ipi_replay is called from raw_local_irq_restore
+  */
+
+void smtc_ipi_replay(void)
 {
 	unsigned int cpu = smp_processor_id();
 
 	/*
 	 * To the extent that we've ever turned interrupts off,
 	 * we may have accumulated deferred IPIs.  This is subtle.
-	 * If we use the smtc_ipi_qdepth() macro, we'll get an
-	 * exact number - but we'll also disable interrupts
-	 * and create a window of failure where a new IPI gets
-	 * queued after we test the depth but before we re-enable
-	 * interrupts. So long as IXMT never gets set, however,
 	 * we should be OK:  If we pick up something and dispatch
 	 * it here, that's great. If we see nothing, but concurrent
 	 * with this operation, another TC sends us an IPI, IXMT
 	 * is clear, and we'll handle it as a real pseudo-interrupt
-	 * and not a pseudo-pseudo interrupt.
+	 * and not a pseudo-pseudo interrupt.  The important thing
+	 * is to do the last check for queued message *after* the
+	 * re-enabling of interrupts.
 	 */
-	if (IPIQ[cpu].depth > 0) {
-		while (1) {
-			struct smtc_ipi_q *q = &IPIQ[cpu];
-			struct smtc_ipi *pipi;
-			extern void self_ipi(struct smtc_ipi *);
+	while (IPIQ[cpu].head != NULL) {
+		struct smtc_ipi_q *q = &IPIQ[cpu];
+		struct smtc_ipi *pipi;
+		unsigned long flags;
 
-			spin_lock(&q->lock);
-			pipi = __smtc_ipi_dq(q);
-			spin_unlock(&q->lock);
-			if (!pipi)
-				break;
+		/*
+		 * It's just possible we'll come in with interrupts
+		 * already enabled.
+		 */
+		local_irq_save(flags);
 
+		spin_lock(&q->lock);
+		pipi = __smtc_ipi_dq(q);
+		spin_unlock(&q->lock);
+		/*
+		 ** But use a raw restore here to avoid recursion.
+		 */
+		__raw_local_irq_restore(flags);
+
+		if (pipi) {
 			self_ipi(pipi);
 			smtc_cpu_stats[cpu].selfipis++;
 		}
 	}
 }
 
-void smtc_ipi_replay(void)
-{
-	raw_local_irq_disable();
-	__smtc_ipi_replay();
-}
-
 EXPORT_SYMBOL(smtc_ipi_replay);
 
 void smtc_idle_loop_hook(void)
@@ -1193,40 +1238,13 @@
 		}
 	}
 
-	/*
-	 * Now that we limit outstanding timer IPIs, check for hung TC
-	 */
-	for (tc = 0; tc < NR_CPUS; tc++) {
-		/* Don't check ourself - we'll dequeue IPIs just below */
-		if ((tc != smp_processor_id()) &&
-		    atomic_read(&ipi_timer_latch[tc]) > timerq_limit) {
-		    if (clock_hang_reported[tc] == 0) {
-			pdb_msg += sprintf(pdb_msg,
-				"TC %d looks hung with timer latch at %d\n",
-				tc, atomic_read(&ipi_timer_latch[tc]));
-			clock_hang_reported[tc]++;
-			}
-		}
-	}
 	emt(mtflags);
 	local_irq_restore(flags);
 	if (pdb_msg != &id_ho_db_msg[0])
 		printk("CPU%d: %s", smp_processor_id(), id_ho_db_msg);
 #endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */
 
-	/*
-	 * Replay any accumulated deferred IPIs. If "Instant Replay"
-	 * is in use, there should never be any.
-	 */
-#ifndef CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY
-	{
-		unsigned long flags;
-
-		local_irq_save(flags);
-		__smtc_ipi_replay();
-		local_irq_restore(flags);
-	}
-#endif /* CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY */
+	smtc_ipi_replay();
 }
 
 void smtc_soft_dump(void)
@@ -1242,10 +1260,6 @@
 		printk("%d: %ld\n", i, smtc_cpu_stats[i].selfipis);
 	}
 	smtc_ipi_qdump();
-	printk("Timer IPI Backlogs:\n");
-	for (i=0; i < NR_CPUS; i++) {
-		printk("%d: %d\n", i, atomic_read(&ipi_timer_latch[i]));
-	}
 	printk("%d Recoveries of \"stolen\" FPU\n",
 	       atomic_read(&smtc_fpu_recoveries));
 }

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 5fd0cd0..b602ac6 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c

@@ -825,8 +825,10 @@
 		if (cpus_intersects(current->cpus_allowed, mt_fpu_cpumask)) {
 			cpumask_t tmask;
 
-			cpus_and(tmask, current->thread.user_cpus_allowed,
-			         mt_fpu_cpumask);
+			current->thread.user_cpus_allowed
+				= current->cpus_allowed;
+			cpus_and(tmask, current->cpus_allowed,
+				mt_fpu_cpumask);
 			set_cpus_allowed(current, tmask);
 			set_thread_flag(TIF_FPUBOUND);
 		}

diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile
index 3b7dd72..cef2db8 100644
--- a/arch/mips/mti-malta/Makefile
+++ b/arch/mips/mti-malta/Makefile

@@ -15,6 +15,6 @@
 obj-$(CONFIG_PCI)		+= malta-pci.o
 
 # FIXME FIXME FIXME
-obj-$(CONFIG_MIPS_MT_SMTC)	+= malta_smtc.o
+obj-$(CONFIG_MIPS_MT_SMTC)	+= malta-smtc.o
 
 EXTRA_CFLAGS += -Werror

diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index 5ea705e..f84a46a 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c

@@ -84,12 +84,17 @@
 
 static void __init msmtc_smp_setup(void)
 {
-	mipsmt_build_cpu_map(0);
+	/*
+	 * we won't get the definitive value until
+	 * we've run smtc_prepare_cpus later, but
+	 * we would appear to need an upper bound now.
+	 */
+	smp_num_siblings = smtc_build_cpu_map(0);
 }
 
 static void __init msmtc_prepare_cpus(unsigned int max_cpus)
 {
-	mipsmt_prepare_cpus();
+	smtc_prepare_cpus(max_cpus);
 }
 
 struct plat_smp_ops msmtc_smp_ops = {

diff --git a/arch/mips/sibyte/swarm/Makefile b/arch/mips/sibyte/swarm/Makefile
index f18ba92..7b45f19 100644
--- a/arch/mips/sibyte/swarm/Makefile
+++ b/arch/mips/sibyte/swarm/Makefile

@@ -1,3 +1,4 @@
-obj-y				:= setup.o rtc_xicor1241.o rtc_m41t81.o
+obj-y				:= platform.o setup.o rtc_xicor1241.o \
+				   rtc_m41t81.o
 
 obj-$(CONFIG_I2C_BOARDINFO)	+= swarm-i2c.o

diff --git a/arch/mips/sibyte/swarm/platform.c b/arch/mips/sibyte/swarm/platform.c
new file mode 100644
index 0000000..54847fe
--- /dev/null
+++ b/arch/mips/sibyte/swarm/platform.c

@@ -0,0 +1,85 @@
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/ata_platform.h>
+
+#include <asm/sibyte/board.h>
+#include <asm/sibyte/sb1250_genbus.h>
+#include <asm/sibyte/sb1250_regs.h>
+
+#if defined(CONFIG_SIBYTE_SWARM) || defined(CONFIG_SIBYTE_LITTLESUR)
+
+#define DRV_NAME	"pata-swarm"
+
+#define SWARM_IDE_SHIFT	5
+#define SWARM_IDE_BASE	0x1f0
+#define SWARM_IDE_CTRL	0x3f6
+
+static struct resource swarm_pata_resource[] = {
+	{
+		.name	= "Swarm GenBus IDE",
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.name	= "Swarm GenBus IDE",
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.name	= "Swarm GenBus IDE",
+		.flags	= IORESOURCE_IRQ,
+		.start	= K_INT_GB_IDE,
+		.end	= K_INT_GB_IDE,
+	},
+};
+
+static struct pata_platform_info pata_platform_data = {
+	.ioport_shift	= SWARM_IDE_SHIFT,
+};
+
+static struct platform_device swarm_pata_device = {
+	.name		= "pata_platform",
+	.id		= -1,
+	.resource	= swarm_pata_resource,
+	.num_resources	= ARRAY_SIZE(swarm_pata_resource),
+	.dev  = {
+		.platform_data		= &pata_platform_data,
+		.coherent_dma_mask	= ~0,	/* grumble */
+	},
+};
+
+static int __init swarm_pata_init(void)
+{
+	u8 __iomem *base;
+	phys_t offset, size;
+	struct resource *r;
+
+	if (!SIBYTE_HAVE_IDE)
+		return -ENODEV;
+
+	base = ioremap(A_IO_EXT_BASE, 0x800);
+	offset = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_START_ADDR, IDE_CS));
+	size = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_MULT_SIZE, IDE_CS));
+	iounmap(base);
+
+	offset = G_IO_START_ADDR(offset) << S_IO_ADDRBASE;
+	size = (G_IO_MULT_SIZE(size) + 1) << S_IO_REGSIZE;
+	if (offset < A_PHYS_GENBUS || offset >= A_PHYS_GENBUS_END) {
+		pr_info(DRV_NAME ": PATA interface at GenBus disabled\n");
+
+		return -EBUSY;
+	}
+
+	pr_info(DRV_NAME ": PATA interface at GenBus slot %i\n", IDE_CS);
+
+	r = swarm_pata_resource;
+	r[0].start = offset + (SWARM_IDE_BASE << SWARM_IDE_SHIFT);
+	r[0].end   = offset + ((SWARM_IDE_BASE + 8) << SWARM_IDE_SHIFT) - 1;
+	r[1].start = offset + (SWARM_IDE_CTRL << SWARM_IDE_SHIFT);
+	r[1].end   = offset + ((SWARM_IDE_CTRL + 1) << SWARM_IDE_SHIFT) - 1;
+
+	return platform_device_register(&swarm_pata_device);
+}
+
+device_initcall(swarm_pata_init);
+
+#endif /* defined(CONFIG_SIBYTE_SWARM) || defined(CONFIG_SIBYTE_LITTLESUR) */

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index ca114fe..06acb1a 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c

@@ -169,6 +169,8 @@
 
 static void clock_comparator_interrupt(__u16 code)
 {
+	if (S390_lowcore.clock_comparator == -1ULL)
+		set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
 static void etr_timing_alert(struct etr_irq_parm *);

diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index fc6ab60..0953cee 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c

@@ -1,14 +1,9 @@
 /*
- *  arch/s390/lib/delay.c
  *    Precise Delay Loops for S390
  *
- *  S390 version
- *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- *
- *  Derived from "arch/i386/lib/delay.c"
- *    Copyright (C) 1993 Linus Torvalds
- *    Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *    Copyright IBM Corp. 1999,2008
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>,
  */
 
 #include <linux/sched.h>
@@ -29,30 +24,31 @@
 	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 }
 
-/*
- * Waits for 'usecs' microseconds using the TOD clock comparator.
- */
-void __udelay(unsigned long usecs)
+static void __udelay_disabled(unsigned long usecs)
 {
-	u64 end, time, old_cc = 0;
-	unsigned long flags, cr0, mask, dummy;
-	int irq_context;
+	unsigned long mask, cr0, cr0_saved;
+	u64 clock_saved;
 
-	irq_context = in_interrupt();
-	if (!irq_context)
-		local_bh_disable();
-	local_irq_save(flags);
-	if (raw_irqs_disabled_flags(flags)) {
-		old_cc = local_tick_disable();
-		S390_lowcore.clock_comparator = -1ULL;
-		__ctl_store(cr0, 0, 0);
-		dummy = (cr0 & 0xffff00e0) | 0x00000800;
-		__ctl_load(dummy , 0, 0);
-		mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
-	} else
-		mask = psw_kernel_bits | PSW_MASK_WAIT |
-			PSW_MASK_EXT | PSW_MASK_IO;
+	clock_saved = local_tick_disable();
+	set_clock_comparator(get_clock() + ((u64) usecs << 12));
+	__ctl_store(cr0_saved, 0, 0);
+	cr0 = (cr0_saved & 0xffff00e0) | 0x00000800;
+	__ctl_load(cr0 , 0, 0);
+	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
+	trace_hardirqs_on();
+	__load_psw_mask(mask);
+	local_irq_disable();
+	__ctl_load(cr0_saved, 0, 0);
+	local_tick_enable(clock_saved);
+	set_clock_comparator(S390_lowcore.clock_comparator);
+}
 
+static void __udelay_enabled(unsigned long usecs)
+{
+	unsigned long mask;
+	u64 end, time;
+
+	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO;
 	end = get_clock() + ((u64) usecs << 12);
 	do {
 		time = end < S390_lowcore.clock_comparator ?
@@ -62,13 +58,37 @@
 		__load_psw_mask(mask);
 		local_irq_disable();
 	} while (get_clock() < end);
-
-	if (raw_irqs_disabled_flags(flags)) {
-		__ctl_load(cr0, 0, 0);
-		local_tick_enable(old_cc);
-	}
-	if (!irq_context)
-		_local_bh_enable();
 	set_clock_comparator(S390_lowcore.clock_comparator);
+}
+
+/*
+ * Waits for 'usecs' microseconds using the TOD clock comparator.
+ */
+void __udelay(unsigned long usecs)
+{
+	unsigned long flags;
+
+	preempt_disable();
+	local_irq_save(flags);
+	if (in_irq()) {
+		__udelay_disabled(usecs);
+		goto out;
+	}
+	if (in_softirq()) {
+		if (raw_irqs_disabled_flags(flags))
+			__udelay_disabled(usecs);
+		else
+			__udelay_enabled(usecs);
+		goto out;
+	}
+	if (raw_irqs_disabled_flags(flags)) {
+		local_bh_disable();
+		__udelay_disabled(usecs);
+		_local_bh_enable();
+		goto out;
+	}
+	__udelay_enabled(usecs);
+out:
 	local_irq_restore(flags);
+	preempt_enable();
 }

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ed92864..97f0d2b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -29,6 +29,7 @@
 	select HAVE_FTRACE
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 	select HAVE_ARCH_KGDB if !X86_VOYAGER
+	select HAVE_ARCH_TRACEHOOK
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 
@@ -1020,7 +1021,7 @@
 
 config ARCH_FLATMEM_ENABLE
 	def_bool y
-	depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC && !NUMA
+	depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA
 
 config ARCH_DISCONTIGMEM_ENABLE
 	def_bool y
@@ -1036,7 +1037,7 @@
 
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
-	depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC)
+	depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_GENERICARCH
 	select SPARSEMEM_STATIC if X86_32
 	select SPARSEMEM_VMEMMAP_ENABLE if X86_64
 
@@ -1117,10 +1118,10 @@
 	  You can safely say Y even if your machine doesn't have MTRRs, you'll
 	  just add about 9 KB to your kernel.
 
-	  See <file:Documentation/mtrr.txt> for more information.
+	  See <file:Documentation/x86/mtrr.txt> for more information.
 
 config MTRR_SANITIZER
-	bool
+	def_bool y
 	prompt "MTRR cleanup support"
 	depends on MTRR
 	help
@@ -1131,7 +1132,7 @@
 	  The largest mtrr entry size for a continous block can be set with
 	  mtrr_chunk_size.
 
-	  If unsure, say N.
+	  If unsure, say Y.
 
 config MTRR_SANITIZER_ENABLE_DEFAULT
 	int "MTRR cleanup enable value (0-1)"
@@ -1191,7 +1192,6 @@
 config SECCOMP
 	def_bool y
 	prompt "Enable seccomp to safely compute untrusted bytecode"
-	depends on PROC_FS
 	help
 	  This kernel feature is useful for number crunching applications
 	  that may need to compute untrusted bytecode during their
@@ -1199,7 +1199,7 @@
 	  the process as file descriptors supporting the read/write
 	  syscalls, it's possible to isolate those applications in
 	  their own address space using seccomp. Once seccomp is
-	  enabled via /proc/<pid>/seccomp, it cannot be disabled
+	  enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
 	  and the task is only allowed to execute a few safe syscalls
 	  defined by each seccomp mode.
 
@@ -1356,14 +1356,14 @@
 	  Don't change this unless you know what you are doing.
 
 config HOTPLUG_CPU
-	bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)"
-	depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER
+	bool "Support for hot-pluggable CPUs"
+	depends on SMP && HOTPLUG && !X86_VOYAGER
 	---help---
-	  Say Y here to experiment with turning CPUs off and on, and to
-	  enable suspend on SMP systems. CPUs can be controlled through
-	  /sys/devices/system/cpu.
-	  Say N if you want to disable CPU hotplug and don't need to
-	  suspend.
+	  Say Y here to allow turning CPUs off and on. CPUs can be
+	  controlled through /sys/devices/system/cpu.
+	  ( Note: power management support will enable this option
+	    automatically on SMP systems. )
+	  Say N if you want to disable CPU hotplug.
 
 config COMPAT_VDSO
 	def_bool y
@@ -1378,6 +1378,51 @@
 
 	  If unsure, say Y.
 
+config CMDLINE_BOOL
+	bool "Built-in kernel command line"
+	default n
+	help
+	  Allow for specifying boot arguments to the kernel at
+	  build time.  On some systems (e.g. embedded ones), it is
+	  necessary or convenient to provide some or all of the
+	  kernel boot arguments with the kernel itself (that is,
+	  to not rely on the boot loader to provide them.)
+
+	  To compile command line arguments into the kernel,
+	  set this option to 'Y', then fill in the
+	  the boot arguments in CONFIG_CMDLINE.
+
+	  Systems with fully functional boot loaders (i.e. non-embedded)
+	  should leave this option set to 'N'.
+
+config CMDLINE
+	string "Built-in kernel command string"
+	depends on CMDLINE_BOOL
+	default ""
+	help
+	  Enter arguments here that should be compiled into the kernel
+	  image and used at boot time.  If the boot loader provides a
+	  command line at boot time, it is appended to this string to
+	  form the full kernel command line, when the system boots.
+
+	  However, you can use the CONFIG_CMDLINE_OVERRIDE option to
+	  change this behavior.
+
+	  In most cases, the command line (whether built-in or provided
+	  by the boot loader) should specify the device for the root
+	  file system.
+
+config CMDLINE_OVERRIDE
+	bool "Built-in command line overrides boot loader arguments"
+	default n
+	depends on CMDLINE_BOOL
+	help
+	  Set this option to 'Y' to have the kernel ignore the boot loader
+	  command line, and use ONLY the built-in command line.
+
+	  This is used to work around broken boot loaders.  This should
+	  be set to 'N' under normal conditions.
+
 endmenu
 
 config ARCH_ENABLE_MEMORY_HOTPLUG
@@ -1773,7 +1818,7 @@
 
 config SYSVIPC_COMPAT
 	def_bool y
-	depends on X86_64 && COMPAT && SYSVIPC
+	depends on COMPAT && SYSVIPC
 
 endmenu
 

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b225219..60a8576 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu

@@ -418,3 +418,21 @@
 config X86_DEBUGCTLMSR
 	def_bool y
 	depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
+
+config X86_DS
+	bool "Debug Store support"
+	default y
+	help
+	  Add support for Debug Store.
+	  This allows the kernel to provide a memory buffer to the hardware
+	  to store various profiling and tracing events.
+
+config X86_PTRACE_BTS
+	bool "ptrace interface to Branch Trace Store"
+	default y
+	depends on (X86_DS && X86_DEBUGCTLMSR)
+	help
+	  Add a ptrace interface to allow collecting an execution trace
+	  of the traced task.
+	  This collects control flow changes in a (cyclic) buffer and allows
+	  debuggers to fill in the gaps and show an execution trace of the debuggee.

diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index ba7736c..29c5fbf 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S

@@ -137,14 +137,15 @@
  */
 	movl output_len(%ebx), %eax
 	pushl %eax
+			# push arguments for decompress_kernel:
 	pushl %ebp	# output address
 	movl input_len(%ebx), %eax
 	pushl %eax	# input_len
 	leal input_data(%ebx), %eax
 	pushl %eax	# input_data
 	leal boot_heap(%ebx), %eax
-	pushl %eax	# heap area as third argument
-	pushl %esi	# real mode pointer as second arg
+	pushl %eax	# heap area
+	pushl %esi	# real mode pointer
 	call decompress_kernel
 	addl $20, %esp
 	popl %ecx

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 9fea737..5780d36 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c

@@ -16,7 +16,7 @@
  */
 #undef CONFIG_PARAVIRT
 #ifdef CONFIG_X86_32
-#define _ASM_DESC_H_ 1
+#define ASM_X86__DESC_H 1
 #endif
 
 #ifdef CONFIG_X86_64
@@ -27,7 +27,7 @@
 #include <linux/linkage.h>
 #include <linux/screen_info.h>
 #include <linux/elf.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/page.h>
 #include <asm/boot.h>
 #include <asm/bootparam.h>
@@ -251,7 +251,7 @@
 				y--;
 			}
 		} else {
-			vidmem [(x + cols * y) * 2] = c;
+			vidmem[(x + cols * y) * 2] = c;
 			if (++x >= cols) {
 				x = 0;
 				if (++y >= lines) {
@@ -277,7 +277,8 @@
 	int i;
 	char *ss = s;
 
-	for (i = 0; i < n; i++) ss[i] = c;
+	for (i = 0; i < n; i++)
+		ss[i] = c;
 	return s;
 }
 
@@ -287,7 +288,8 @@
 	const char *s = src;
 	char *d = dest;
 
-	for (i = 0; i < n; i++) d[i] = s[i];
+	for (i = 0; i < n; i++)
+		d[i] = s[i];
 	return dest;
 }
 

diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index a1310c5..857e492 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c

@@ -492,7 +492,7 @@
 			continue;
 		}
 		sh_symtab = sec_symtab->symtab;
-		sym_strtab = sec->link->strtab;
+		sym_strtab = sec_symtab->link->strtab;
 		for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
 			Elf32_Rel *rel;
 			Elf32_Sym *sym;

diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index af86e43..b993062 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S

@@ -30,7 +30,6 @@
 SYSSIZE		= DEF_SYSSIZE		/* system size: # of 16-byte clicks */
 					/* to be loaded */
 ROOT_DEV	= 0			/* ROOT_DEV is now written by "build" */
-SWAP_DEV	= 0			/* SWAP_DEV is now written by "build" */
 
 #ifndef SVGA_MODE
 #define SVGA_MODE ASK_VGA

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 104275e..ef9a520 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig

@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.27-rc4
-# Mon Aug 25 15:04:00 2008
+# Linux kernel version: 2.6.27-rc5
+# Wed Sep  3 17:23:09 2008
 #
 # CONFIG_64BIT is not set
 CONFIG_X86_32=y
@@ -202,7 +202,7 @@
 # CONFIG_M586 is not set
 # CONFIG_M586TSC is not set
 # CONFIG_M586MMX is not set
-# CONFIG_M686 is not set
+CONFIG_M686=y
 # CONFIG_MPENTIUMII is not set
 # CONFIG_MPENTIUMIII is not set
 # CONFIG_MPENTIUMM is not set
@@ -221,13 +221,14 @@
 # CONFIG_MVIAC3_2 is not set
 # CONFIG_MVIAC7 is not set
 # CONFIG_MPSC is not set
-CONFIG_MCORE2=y
+# CONFIG_MCORE2 is not set
 # CONFIG_GENERIC_CPU is not set
 CONFIG_X86_GENERIC=y
 CONFIG_X86_CPU=y
 CONFIG_X86_CMPXCHG=y
 CONFIG_X86_L1_CACHE_SHIFT=7
 CONFIG_X86_XADD=y
+# CONFIG_X86_PPRO_FENCE is not set
 CONFIG_X86_WP_WORKS_OK=y
 CONFIG_X86_INVLPG=y
 CONFIG_X86_BSWAP=y
@@ -235,14 +236,15 @@
 CONFIG_X86_INTEL_USERCOPY=y
 CONFIG_X86_USE_PPRO_CHECKSUM=y
 CONFIG_X86_TSC=y
+CONFIG_X86_CMOV=y
 CONFIG_X86_MINIMUM_CPU_FAMILY=4
 CONFIG_X86_DEBUGCTLMSR=y
 CONFIG_HPET_TIMER=y
 CONFIG_HPET_EMULATE_RTC=y
 CONFIG_DMI=y
 # CONFIG_IOMMU_HELPER is not set
-CONFIG_NR_CPUS=4
-# CONFIG_SCHED_SMT is not set
+CONFIG_NR_CPUS=64
+CONFIG_SCHED_SMT=y
 CONFIG_SCHED_MC=y
 # CONFIG_PREEMPT_NONE is not set
 CONFIG_PREEMPT_VOLUNTARY=y
@@ -254,7 +256,8 @@
 # CONFIG_TOSHIBA is not set
 # CONFIG_I8K is not set
 CONFIG_X86_REBOOTFIXUPS=y
-# CONFIG_MICROCODE is not set
+CONFIG_MICROCODE=y
+CONFIG_MICROCODE_OLD_INTERFACE=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
 # CONFIG_NOHIGHMEM is not set
@@ -2115,7 +2118,7 @@
 CONFIG_DEFAULT_IO_DELAY_TYPE=0
 CONFIG_DEBUG_BOOT_PARAMS=y
 # CONFIG_CPA_DEBUG is not set
-# CONFIG_OPTIMIZE_INLINING is not set
+CONFIG_OPTIMIZE_INLINING=y
 
 #
 # Security options

diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 678c8ac..e620ea6 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig

@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.27-rc4
-# Mon Aug 25 14:40:46 2008
+# Linux kernel version: 2.6.27-rc5
+# Wed Sep  3 17:13:39 2008
 #
 CONFIG_64BIT=y
 # CONFIG_X86_32 is not set
@@ -218,17 +218,14 @@
 # CONFIG_MVIAC3_2 is not set
 # CONFIG_MVIAC7 is not set
 # CONFIG_MPSC is not set
-CONFIG_MCORE2=y
-# CONFIG_GENERIC_CPU is not set
+# CONFIG_MCORE2 is not set
+CONFIG_GENERIC_CPU=y
 CONFIG_X86_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=64
-CONFIG_X86_INTERNODE_CACHE_BYTES=64
+CONFIG_X86_L1_CACHE_BYTES=128
+CONFIG_X86_INTERNODE_CACHE_BYTES=128
 CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_L1_CACHE_SHIFT=7
 CONFIG_X86_WP_WORKS_OK=y
-CONFIG_X86_INTEL_USERCOPY=y
-CONFIG_X86_USE_PPRO_CHECKSUM=y
-CONFIG_X86_P6_NOP=y
 CONFIG_X86_TSC=y
 CONFIG_X86_CMPXCHG64=y
 CONFIG_X86_CMOV=y
@@ -243,9 +240,8 @@
 CONFIG_AMD_IOMMU=y
 CONFIG_SWIOTLB=y
 CONFIG_IOMMU_HELPER=y
-# CONFIG_MAXSMP is not set
-CONFIG_NR_CPUS=4
-# CONFIG_SCHED_SMT is not set
+CONFIG_NR_CPUS=64
+CONFIG_SCHED_SMT=y
 CONFIG_SCHED_MC=y
 # CONFIG_PREEMPT_NONE is not set
 CONFIG_PREEMPT_VOLUNTARY=y
@@ -254,7 +250,8 @@
 CONFIG_X86_IO_APIC=y
 # CONFIG_X86_MCE is not set
 # CONFIG_I8K is not set
-# CONFIG_MICROCODE is not set
+CONFIG_MICROCODE=y
+CONFIG_MICROCODE_OLD_INTERFACE=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
 CONFIG_NUMA=y
@@ -290,7 +287,7 @@
 CONFIG_VIRT_TO_BUS=y
 CONFIG_MTRR=y
 # CONFIG_MTRR_SANITIZER is not set
-# CONFIG_X86_PAT is not set
+CONFIG_X86_PAT=y
 CONFIG_EFI=y
 CONFIG_SECCOMP=y
 # CONFIG_HZ_100 is not set
@@ -2089,7 +2086,7 @@
 CONFIG_DEFAULT_IO_DELAY_TYPE=0
 CONFIG_DEBUG_BOOT_PARAMS=y
 # CONFIG_CPA_DEBUG is not set
-# CONFIG_OPTIMIZE_INLINING is not set
+CONFIG_OPTIMIZE_INLINING=y
 
 #
 # Security options

diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index a0e1dbe..127ec3f 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c

@@ -85,8 +85,10 @@
 	dump->regs.ax = regs->ax;
 	dump->regs.ds = current->thread.ds;
 	dump->regs.es = current->thread.es;
-	asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs;
-	asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
+	savesegment(fs, fs);
+	dump->regs.fs = fs;
+	savesegment(gs, gs);
+	dump->regs.gs = gs;
 	dump->regs.orig_ax = regs->orig_ax;
 	dump->regs.ip = regs->ip;
 	dump->regs.cs = regs->cs;
@@ -430,8 +432,9 @@
 	current->mm->start_stack =
 		(unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
 	/* start thread */
-	asm volatile("movl %0,%%fs" :: "r" (0)); \
-	asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS));
+	loadsegment(fs, 0);
+	loadsegment(ds, __USER32_DS);
+	loadsegment(es, __USER32_DS);
 	load_gs_index(0);
 	(regs)->ip = ex.a_entry;
 	(regs)->sp = current->mm->start_stack;

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 20af4c7..f1a2ac7 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c

@@ -206,7 +206,7 @@
 	{ unsigned int cur;						\
 	  unsigned short pre;						\
 	  err |= __get_user(pre, &sc->seg);				\
-	  asm volatile("movl %%" #seg ",%0" : "=r" (cur));		\
+	  savesegment(seg, cur);					\
 	  pre |= mask;							\
 	  if (pre != cur) loadsegment(seg, pre); }
 
@@ -235,7 +235,7 @@
 	 */
 	err |= __get_user(gs, &sc->gs);
 	gs |= 3;
-	asm("movl %%gs,%0" : "=r" (oldgs));
+	savesegment(gs, oldgs);
 	if (gs != oldgs)
 		load_gs_index(gs);
 
@@ -355,14 +355,13 @@
 {
 	int tmp, err = 0;
 
-	tmp = 0;
-	__asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+	savesegment(gs, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
-	__asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+	savesegment(fs, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
-	__asm__("movl %%ds,%0" : "=r"(tmp): "0"(tmp));
+	savesegment(ds, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->ds);
-	__asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp));
+	savesegment(es, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->es);
 
 	err |= __put_user((u32)regs->di, &sc->di);
@@ -498,8 +497,8 @@
 	regs->dx = 0;
 	regs->cx = 0;
 
-	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
-	asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+	loadsegment(ds, __USER32_DS);
+	loadsegment(es, __USER32_DS);
 
 	regs->cs = __USER32_CS;
 	regs->ss = __USER32_DS;
@@ -591,8 +590,8 @@
 	regs->dx = (unsigned long) &frame->info;
 	regs->cx = (unsigned long) &frame->uc;
 
-	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
-	asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+	loadsegment(ds, __USER32_DS);
+	loadsegment(es, __USER32_DS);
 
 	regs->cs = __USER32_CS;
 	regs->ss = __USER32_DS;

diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index d3c6408..beda423 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c

@@ -556,15 +556,6 @@
 	return ret;
 }
 
-/* These are here just in case some old ia32 binary calls it. */
-asmlinkage long sys32_pause(void)
-{
-	current->state = TASK_INTERRUPTIBLE;
-	schedule();
-	return -ERESTARTNOHAND;
-}
-
-
 #ifdef CONFIG_SYSCTL_SYSCALL
 struct sysctl_ia32 {
 	unsigned int	name;

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index bfd10fd..7d40ef7 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c

@@ -58,7 +58,6 @@
 #ifdef	CONFIG_X86_64
 
 #include <asm/proto.h>
-#include <asm/genapic.h>
 
 #else				/* X86 */
 
@@ -97,8 +96,6 @@
 #warning ACPI uses CMPXCHG, i486 and later hardware
 #endif
 
-static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
-
 /* --------------------------------------------------------------------------
                               Boot-time Configuration
    -------------------------------------------------------------------------- */
@@ -160,6 +157,8 @@
 struct acpi_mcfg_allocation *pci_mmcfg_config;
 int pci_mmcfg_config_num;
 
+static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
+
 static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
 {
 	if (!strcmp(mcfg->header.oem_id, "SGI"))
@@ -1605,6 +1604,14 @@
 	 */
 	{
 	 .callback = dmi_ignore_irq0_timer_override,
+	 .ident = "HP nx6115 laptop",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"),
+		     },
+	 },
+	{
+	 .callback = dmi_ignore_irq0_timer_override,
 	 .ident = "HP NX6125 laptop",
 	 .matches = {
 		     DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
@@ -1619,6 +1626,14 @@
 		     DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"),
 		     },
 	 },
+	{
+	 .callback = dmi_ignore_irq0_timer_override,
+	 .ident = "HP 6715b laptop",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"),
+		     },
+	 },
 	{}
 };
 

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65a0c1b..fb04e49 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c

@@ -231,25 +231,25 @@
 			continue;
 		if (*ptr > text_end)
 			continue;
-		text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
+		/* turn DS segment override prefix into lock prefix */
+		text_poke(*ptr, ((unsigned char []){0xf0}), 1);
 	};
 }
 
 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
 {
 	u8 **ptr;
-	char insn[1];
 
 	if (noreplace_smp)
 		return;
 
-	add_nops(insn, 1);
 	for (ptr = start; ptr < end; ptr++) {
 		if (*ptr < text)
 			continue;
 		if (*ptr > text_end)
 			continue;
-		text_poke(*ptr, insn, 1);
+		/* turn lock prefix into DS segment override prefix */
+		text_poke(*ptr, ((unsigned char []){0x3E}), 1);
 	};
 }
 

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 44e2182..9a32b37 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c

@@ -455,11 +455,11 @@
 		   force_iommu ||
 		   valid_agp ||
 		   fallback_aper_force) {
-		printk(KERN_ERR
+		printk(KERN_INFO
 			"Your BIOS doesn't leave a aperture memory hole\n");
-		printk(KERN_ERR
+		printk(KERN_INFO
 			"Please enable the IOMMU option in the BIOS setup\n");
-		printk(KERN_ERR
+		printk(KERN_INFO
 			"This costs you %d MB of RAM\n",
 				32 << fallback_aper_order);
 

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 732d1f4..5145a6e 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c

@@ -228,7 +228,6 @@
 #include <linux/suspend.h>
 #include <linux/kthread.h>
 #include <linux/jiffies.h>
-#include <linux/smp_lock.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>

diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index aa89387..505543a 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c

@@ -22,7 +22,7 @@
 
 #define __NO_STUBS 1
 #undef __SYSCALL
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 #define __SYSCALL(nr, sym) [nr] = 1,
 static char syscalls[] = {
 #include <asm/unistd.h>

diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index c639bd5..fdd585f 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c

@@ -25,11 +25,11 @@
 {
 	const char *str;
 	switch (status) {
-	case  0: str = "Call completed without error"; break;
-	case -1: str = "Not implemented"; break;
-	case -2: str = "Invalid argument"; break;
-	case -3: str = "Call completed with error"; break;
-	default: str = "Unknown BIOS status code"; break;
+	case  0: str = "Call completed without error";	break;
+	case -1: str = "Not implemented";		break;
+	case -2: str = "Invalid argument";		break;
+	case -3: str = "Call completed with error";	break;
+	default: str = "Unknown BIOS status code";	break;
 	}
 	return str;
 }

diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index a11f5d4..305b465 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c

@@ -430,6 +430,49 @@
 }
 __setup("noclflush", setup_noclflush);
 
+struct msr_range {
+	unsigned min;
+	unsigned max;
+};
+
+static struct msr_range msr_range_array[] __cpuinitdata = {
+	{ 0x00000000, 0x00000418},
+	{ 0xc0000000, 0xc000040b},
+	{ 0xc0010000, 0xc0010142},
+	{ 0xc0011000, 0xc001103b},
+};
+
+static void __cpuinit print_cpu_msr(void)
+{
+	unsigned index;
+	u64 val;
+	int i;
+	unsigned index_min, index_max;
+
+	for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
+		index_min = msr_range_array[i].min;
+		index_max = msr_range_array[i].max;
+		for (index = index_min; index < index_max; index++) {
+			if (rdmsrl_amd_safe(index, &val))
+				continue;
+			printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
+		}
+	}
+}
+
+static int show_msr __cpuinitdata;
+static __init int setup_show_msr(char *arg)
+{
+	int num;
+
+	get_option(&arg, &num);
+
+	if (num > 0)
+		show_msr = num;
+	return 1;
+}
+__setup("show_msr=", setup_show_msr);
+
 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 {
 	if (c->x86_model_id[0])
@@ -439,6 +482,14 @@
 		printk(KERN_CONT " stepping %02x\n", c->x86_mask);
 	else
 		printk(KERN_CONT "\n");
+
+#ifdef CONFIG_SMP
+	if (c->cpu_index < show_msr)
+		print_cpu_msr();
+#else
+	if (show_msr)
+		print_cpu_msr();
+#endif
 }
 
 static __init int setup_disablecpuid(char *arg)

diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index f1685fb..b8e05ee 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c

@@ -171,7 +171,7 @@
 	}
 
 	if (c->x86 != 0xF) {
-		printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@lists.linux.org.uk>\n");
+		printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n");
 		return 0;
 	}
 

diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 15e13c0..3b5f064 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c

@@ -26,7 +26,7 @@
 #include <asm/cpufeature.h>
 
 #define PFX		"speedstep-centrino: "
-#define MAINTAINER	"cpufreq@lists.linux.org.uk"
+#define MAINTAINER	"cpufreq@vger.kernel.org"
 
 #define dprintk(msg...) \
 	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b75f256..f113ef4 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c

@@ -222,10 +222,11 @@
 			set_cpu_cap(c, X86_FEATURE_BTS);
 		if (!(l1 & (1<<12)))
 			set_cpu_cap(c, X86_FEATURE_PEBS);
+		ds_init_intel(c);
 	}
 
 	if (cpu_has_bts)
-		ds_init_intel(c);
+		ptrace_bts_init_intel(c);
 
 	/*
 	 * See if we have a good local APIC by checking for buggy Pentia,

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index cb7d3b6..4e8d77f 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c

@@ -401,12 +401,7 @@
 		tmp |= ~((1<<(hi - 1)) - 1);
 
 		if (tmp != mask_lo) {
-			static int once = 1;
-
-			if (once) {
-				printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
-				once = 0;
-			}
+			WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
 			mask_lo = tmp;
 		}
 	}

diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 84c480b..4c42146 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c

@@ -405,9 +405,9 @@
 			}
 			/* RED-PEN: base can be > 32bit */ 
 			len += seq_printf(seq, 
-				   "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
+				   "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
 			     i, base, base >> (20 - PAGE_SHIFT), size, factor,
-			     mtrr_attrib_to_str(type), mtrr_usage_table[i]);
+			     mtrr_usage_table[i], mtrr_attrib_to_str(type));
 		}
 	}
 	return 0;

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 885c826..c78c048 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c

@@ -729,7 +729,7 @@
 	mtrr_type type;
 };
 
-struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
 static int __initdata debug_print;
 
 static int __init
@@ -759,7 +759,8 @@
 	/* take out UC ranges */
 	for (i = 0; i < num_var_ranges; i++) {
 		type = range_state[i].type;
-		if (type != MTRR_TYPE_UNCACHABLE)
+		if (type != MTRR_TYPE_UNCACHABLE &&
+		    type != MTRR_TYPE_WRPROT)
 			continue;
 		size = range_state[i].size_pfn;
 		if (!size)
@@ -836,6 +837,13 @@
 }
 early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
 
+static int __init mtrr_cleanup_debug_setup(char *str)
+{
+	debug_print = 1;
+	return 0;
+}
+early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
+
 struct var_mtrr_state {
 	unsigned long	range_startk;
 	unsigned long	range_sizek;
@@ -898,6 +906,27 @@
 	}
 }
 
+static unsigned long to_size_factor(unsigned long sizek, char *factorp)
+{
+	char factor;
+	unsigned long base = sizek;
+
+	if (base & ((1<<10) - 1)) {
+		/* not MB alignment */
+		factor = 'K';
+	} else if (base & ((1<<20) - 1)){
+		factor = 'M';
+		base >>= 10;
+	} else {
+		factor = 'G';
+		base >>= 20;
+	}
+
+	*factorp = factor;
+
+	return base;
+}
+
 static unsigned int __init
 range_to_mtrr(unsigned int reg, unsigned long range_startk,
 	      unsigned long range_sizek, unsigned char type)
@@ -919,13 +948,21 @@
 			align = max_align;
 
 		sizek = 1 << align;
-		if (debug_print)
+		if (debug_print) {
+			char start_factor = 'K', size_factor = 'K';
+			unsigned long start_base, size_base;
+
+			start_base = to_size_factor(range_startk, &start_factor),
+			size_base = to_size_factor(sizek, &size_factor),
+
 			printk(KERN_DEBUG "Setting variable MTRR %d, "
-				"base: %ldMB, range: %ldMB, type %s\n",
-				reg, range_startk >> 10, sizek >> 10,
+				"base: %ld%cB, range: %ld%cB, type %s\n",
+				reg, start_base, start_factor,
+				size_base, size_factor,
 				(type == MTRR_TYPE_UNCACHABLE)?"UC":
 				    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
 				);
+		}
 		save_var_mtrr(reg++, range_startk, sizek, type);
 		range_startk += sizek;
 		range_sizek -= sizek;
@@ -970,6 +1007,8 @@
 	/* try to append some small hole */
 	range0_basek = state->range_startk;
 	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+
+	/* no increase */
 	if (range0_sizek == state->range_sizek) {
 		if (debug_print)
 			printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
@@ -980,13 +1019,40 @@
 		return 0;
 	}
 
-	range0_sizek -= chunk_sizek;
-	if (range0_sizek && sizek) {
-	    while (range0_basek + range0_sizek > (basek + sizek)) {
-		range0_sizek -= chunk_sizek;
-		if (!range0_sizek)
-			break;
-	    }
+	/* only cut back, when it is not the last */
+	if (sizek) {
+		while (range0_basek + range0_sizek > (basek + sizek)) {
+			if (range0_sizek >= chunk_sizek)
+				range0_sizek -= chunk_sizek;
+			else
+				range0_sizek = 0;
+
+			if (!range0_sizek)
+				break;
+		}
+	}
+
+second_try:
+	range_basek = range0_basek + range0_sizek;
+
+	/* one hole in the middle */
+	if (range_basek > basek && range_basek <= (basek + sizek))
+		second_sizek = range_basek - basek;
+
+	if (range0_sizek > state->range_sizek) {
+
+		/* one hole in middle or at end */
+		hole_sizek = range0_sizek - state->range_sizek - second_sizek;
+
+		/* hole size should be less than half of range0 size */
+		if (hole_sizek >= (range0_sizek >> 1) &&
+		    range0_sizek >= chunk_sizek) {
+			range0_sizek -= chunk_sizek;
+			second_sizek = 0;
+			hole_sizek = 0;
+
+			goto second_try;
+		}
 	}
 
 	if (range0_sizek) {
@@ -996,50 +1062,28 @@
 				(range0_basek + range0_sizek)<<10);
 		state->reg = range_to_mtrr(state->reg, range0_basek,
 				range0_sizek, MTRR_TYPE_WRBACK);
-
 	}
 
-	range_basek = range0_basek + range0_sizek;
-	range_sizek = chunk_sizek;
-
-	if (range_basek + range_sizek > basek &&
-	    range_basek + range_sizek <= (basek + sizek)) {
-		/* one hole */
-		second_basek = basek;
-		second_sizek = range_basek + range_sizek - basek;
-	}
-
-	/* if last piece, only could one hole near end */
-	if ((second_basek || !basek) &&
-	    range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
-	    (chunk_sizek >> 1)) {
-		/*
-		 * one hole in middle (second_sizek is 0) or at end
-		 * (second_sizek is 0 )
-		 */
-		hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
-				 - second_sizek;
-		hole_basek = range_basek + range_sizek - hole_sizek
-				 - second_sizek;
-	} else {
-		/* fallback for big hole, or several holes */
+	if (range0_sizek < state->range_sizek) {
+		/* need to handle left over */
 		range_sizek = state->range_sizek - range0_sizek;
-		second_basek = 0;
-		second_sizek = 0;
+
+		if (debug_print)
+			printk(KERN_DEBUG "range: %016lx - %016lx\n",
+				 range_basek<<10,
+				 (range_basek + range_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, range_basek,
+				 range_sizek, MTRR_TYPE_WRBACK);
 	}
 
-	if (debug_print)
-		printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
-			 (range_basek + range_sizek)<<10);
-	state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
-					 MTRR_TYPE_WRBACK);
 	if (hole_sizek) {
+		hole_basek = range_basek - hole_sizek - second_sizek;
 		if (debug_print)
 			printk(KERN_DEBUG "hole: %016lx - %016lx\n",
-				 hole_basek<<10, (hole_basek + hole_sizek)<<10);
-		state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
-						 MTRR_TYPE_UNCACHABLE);
-
+				 hole_basek<<10,
+				 (hole_basek + hole_sizek)<<10);
+		state->reg = range_to_mtrr(state->reg, hole_basek,
+				 hole_sizek, MTRR_TYPE_UNCACHABLE);
 	}
 
 	return second_sizek;
@@ -1154,11 +1198,11 @@
 };
 
 /*
- * gran_size: 1M, 2M, ..., 2G
- * chunk size: gran_size, ..., 4G
- * so we need (2+13)*6
+ * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
+ * chunk size: gran_size, ..., 2G
+ * so we need (1+16)*8
  */
-#define NUM_RESULT	90
+#define NUM_RESULT	136
 #define PSHIFT		(PAGE_SHIFT - 10)
 
 static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
@@ -1168,13 +1212,14 @@
 static int __init mtrr_cleanup(unsigned address_bits)
 {
 	unsigned long extra_remove_base, extra_remove_size;
-	unsigned long i, base, size, def, dummy;
+	unsigned long base, size, def, dummy;
 	mtrr_type type;
 	int nr_range, nr_range_new;
 	u64 chunk_size, gran_size;
 	unsigned long range_sums, range_sums_new;
 	int index_good;
 	int num_reg_good;
+	int i;
 
 	/* extra one for all 0 */
 	int num[MTRR_NUM_TYPES + 1];
@@ -1204,6 +1249,8 @@
 			continue;
 		if (!size)
 			type = MTRR_NUM_TYPES;
+		if (type == MTRR_TYPE_WRPROT)
+			type = MTRR_TYPE_UNCACHABLE;
 		num[type]++;
 	}
 
@@ -1216,23 +1263,57 @@
 		num_var_ranges - num[MTRR_NUM_TYPES])
 		return 0;
 
+	/* print original var MTRRs at first, for debugging: */
+	printk(KERN_DEBUG "original variable MTRRs\n");
+	for (i = 0; i < num_var_ranges; i++) {
+		char start_factor = 'K', size_factor = 'K';
+		unsigned long start_base, size_base;
+
+		size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
+		if (!size_base)
+			continue;
+
+		size_base = to_size_factor(size_base, &size_factor),
+		start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
+		start_base = to_size_factor(start_base, &start_factor),
+		type = range_state[i].type;
+
+		printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+			i, start_base, start_factor,
+			size_base, size_factor,
+			(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
+			    ((type == MTRR_TYPE_WRPROT) ? "WP" :
+			     ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
+			);
+	}
+
 	memset(range, 0, sizeof(range));
 	extra_remove_size = 0;
-	if (mtrr_tom2) {
-		extra_remove_base = 1 << (32 - PAGE_SHIFT);
+	extra_remove_base = 1 << (32 - PAGE_SHIFT);
+	if (mtrr_tom2)
 		extra_remove_size =
 			(mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
-	}
 	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
 					  extra_remove_size);
+	/*
+	 * [0, 1M) should always be coverred by var mtrr with WB
+	 * and fixed mtrrs should take effective before var mtrr for it
+	 */
+	nr_range = add_range_with_merge(range, nr_range, 0,
+					(1ULL<<(20 - PAGE_SHIFT)) - 1);
+	/* sort the ranges */
+	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+
 	range_sums = sum_ranges(range, nr_range);
 	printk(KERN_INFO "total RAM coverred: %ldM\n",
 	       range_sums >> (20 - PAGE_SHIFT));
 
 	if (mtrr_chunk_size && mtrr_gran_size) {
 		int num_reg;
+		char gran_factor, chunk_factor, lose_factor;
+		unsigned long gran_base, chunk_base, lose_base;
 
-		debug_print = 1;
+		debug_print++;
 		/* convert ranges to var ranges state */
 		num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
 					      mtrr_gran_size);
@@ -1256,34 +1337,48 @@
 			result[i].lose_cover_sizek =
 				(range_sums - range_sums_new) << PSHIFT;
 
-		printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
-			 result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10,
-			 result[i].chunk_sizek >> 10);
-		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+		gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+		chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+		lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+		printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+			 result[i].bad?"*BAD*":" ",
+			 gran_base, gran_factor, chunk_base, chunk_factor);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
 			 result[i].num_reg, result[i].bad?"-":"",
-			 result[i].lose_cover_sizek >> 10);
+			 lose_base, lose_factor);
 		if (!result[i].bad) {
 			set_var_mtrr_all(address_bits);
 			return 1;
 		}
 		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
 		       "will find optimal one\n");
-		debug_print = 0;
+		debug_print--;
 		memset(result, 0, sizeof(result[0]));
 	}
 
 	i = 0;
 	memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
 	memset(result, 0, sizeof(result));
-	for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
-		for (chunk_size = gran_size; chunk_size < (1ULL<<33);
+	for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
+		char gran_factor;
+		unsigned long gran_base;
+
+		if (debug_print)
+			gran_base = to_size_factor(gran_size >> 10, &gran_factor);
+
+		for (chunk_size = gran_size; chunk_size < (1ULL<<32);
 		     chunk_size <<= 1) {
 			int num_reg;
 
-			if (debug_print)
-				printk(KERN_INFO
-			       "\ngran_size: %lldM   chunk_size_size: %lldM\n",
-				       gran_size >> 20, chunk_size >> 20);
+			if (debug_print) {
+				char chunk_factor;
+				unsigned long chunk_base;
+
+				chunk_base = to_size_factor(chunk_size>>10, &chunk_factor),
+				printk(KERN_INFO "\n");
+				printk(KERN_INFO "gran_size: %ld%c   chunk_size: %ld%c \n",
+				       gran_base, gran_factor, chunk_base, chunk_factor);
+			}
 			if (i >= NUM_RESULT)
 				continue;
 
@@ -1326,12 +1421,18 @@
 
 	/* print out all */
 	for (i = 0; i < NUM_RESULT; i++) {
-		printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
-		       result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
-		       result[i].chunk_sizek >> 10);
-		printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n",
-		       result[i].num_reg, result[i].bad?"-":"",
-		       result[i].lose_cover_sizek >> 10);
+		char gran_factor, chunk_factor, lose_factor;
+		unsigned long gran_base, chunk_base, lose_base;
+
+		gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+		chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+		lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+		printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+			 result[i].bad?"*BAD*":" ",
+			 gran_base, gran_factor, chunk_base, chunk_factor);
+		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
+			 result[i].num_reg, result[i].bad?"-":"",
+			 lose_base, lose_factor);
 	}
 
 	/* try to find the optimal index */
@@ -1339,10 +1440,8 @@
 		nr_mtrr_spare_reg = num_var_ranges - 1;
 	num_reg_good = -1;
 	for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
-		if (!min_loss_pfn[i]) {
+		if (!min_loss_pfn[i])
 			num_reg_good = i;
-			break;
-		}
 	}
 
 	index_good = -1;
@@ -1358,21 +1457,26 @@
 	}
 
 	if (index_good != -1) {
+		char gran_factor, chunk_factor, lose_factor;
+		unsigned long gran_base, chunk_base, lose_base;
+
 		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
 		i = index_good;
-		printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
-				result[i].gran_sizek >> 10,
-				result[i].chunk_sizek >> 10);
-		printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n",
-				result[i].num_reg,
-				result[i].lose_cover_sizek >> 10);
+		gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+		chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+		lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+		printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t",
+			 gran_base, gran_factor, chunk_base, chunk_factor);
+		printk(KERN_CONT "num_reg: %d  \tlose RAM: %ld%c\n",
+			 result[i].num_reg, lose_base, lose_factor);
 		/* convert ranges to var ranges state */
 		chunk_size = result[i].chunk_sizek;
 		chunk_size <<= 10;
 		gran_size = result[i].gran_sizek;
 		gran_size <<= 10;
-		debug_print = 1;
+		debug_print++;
 		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+		debug_print--;
 		set_var_mtrr_all(address_bits);
 		return 1;
 	}

diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 05cc22d..6bff382 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c

@@ -295,13 +295,19 @@
 	/* setup the timer */
 	wrmsr(evntsel_msr, evntsel, 0);
 	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
+
+	/* initialize the wd struct before enabling */
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = 0;  /* unused */
+
+	/* ok, everything is initialized, announce that we're set */
+	cpu_nmi_set_wd_enabled();
+
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	evntsel |= K7_EVNTSEL_ENABLE;
 	wrmsr(evntsel_msr, evntsel, 0);
 
-	wd->perfctr_msr = perfctr_msr;
-	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = 0;  /* unused */
 	return 1;
 }
 
@@ -379,13 +385,19 @@
 	wrmsr(evntsel_msr, evntsel, 0);
 	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
 	write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
+
+	/* initialize the wd struct before enabling */
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = 0;  /* unused */
+
+	/* ok, everything is initialized, announce that we're set */
+	cpu_nmi_set_wd_enabled();
+
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	evntsel |= P6_EVNTSEL0_ENABLE;
 	wrmsr(evntsel_msr, evntsel, 0);
 
-	wd->perfctr_msr = perfctr_msr;
-	wd->evntsel_msr = evntsel_msr;
-	wd->cccr_msr = 0;  /* unused */
 	return 1;
 }
 
@@ -432,6 +444,27 @@
 #define P4_CCCR_ENABLE		(1 << 12)
 #define P4_CCCR_OVF 		(1 << 31)
 
+#define P4_CONTROLS 18
+static unsigned int p4_controls[18] = {
+	MSR_P4_BPU_CCCR0,
+	MSR_P4_BPU_CCCR1,
+	MSR_P4_BPU_CCCR2,
+	MSR_P4_BPU_CCCR3,
+	MSR_P4_MS_CCCR0,
+	MSR_P4_MS_CCCR1,
+	MSR_P4_MS_CCCR2,
+	MSR_P4_MS_CCCR3,
+	MSR_P4_FLAME_CCCR0,
+	MSR_P4_FLAME_CCCR1,
+	MSR_P4_FLAME_CCCR2,
+	MSR_P4_FLAME_CCCR3,
+	MSR_P4_IQ_CCCR0,
+	MSR_P4_IQ_CCCR1,
+	MSR_P4_IQ_CCCR2,
+	MSR_P4_IQ_CCCR3,
+	MSR_P4_IQ_CCCR4,
+	MSR_P4_IQ_CCCR5,
+};
 /*
  * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
  * CRU_ESCR0 (with any non-null event selector) through a complemented
@@ -473,6 +506,26 @@
 		evntsel_msr = MSR_P4_CRU_ESCR0;
 		cccr_msr = MSR_P4_IQ_CCCR0;
 		cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
+
+		/*
+		 * If we're on the kdump kernel or other situation, we may
+		 * still have other performance counter registers set to
+		 * interrupt and they'll keep interrupting forever because
+		 * of the P4_CCCR_OVF quirk. So we need to ACK all the
+		 * pending interrupts and disable all the registers here,
+		 * before reenabling the NMI delivery. Refer to p4_rearm()
+		 * about the P4_CCCR_OVF quirk.
+		 */
+		if (reset_devices) {
+			unsigned int low, high;
+			int i;
+
+			for (i = 0; i < P4_CONTROLS; i++) {
+				rdmsr(p4_controls[i], low, high);
+				low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
+				wrmsr(p4_controls[i], low, high);
+			}
+		}
 	} else {
 		/* logical cpu 1 */
 		perfctr_msr = MSR_P4_IQ_PERFCTR1;
@@ -499,12 +552,17 @@
 	wrmsr(evntsel_msr, evntsel, 0);
 	wrmsr(cccr_msr, cccr_val, 0);
 	write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	cccr_val |= P4_CCCR_ENABLE;
-	wrmsr(cccr_msr, cccr_val, 0);
+
 	wd->perfctr_msr = perfctr_msr;
 	wd->evntsel_msr = evntsel_msr;
 	wd->cccr_msr = cccr_msr;
+
+	/* ok, everything is initialized, announce that we're set */
+	cpu_nmi_set_wd_enabled();
+
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	cccr_val |= P4_CCCR_ENABLE;
+	wrmsr(cccr_msr, cccr_val, 0);
 	return 1;
 }
 
@@ -620,13 +678,17 @@
 	wrmsr(evntsel_msr, evntsel, 0);
 	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
 	write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsr(evntsel_msr, evntsel, 0);
 
 	wd->perfctr_msr = perfctr_msr;
 	wd->evntsel_msr = evntsel_msr;
 	wd->cccr_msr = 0;  /* unused */
+
+	/* ok, everything is initialized, announce that we're set */
+	cpu_nmi_set_wd_enabled();
+
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsr(evntsel_msr, evntsel, 0);
 	intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
 	return 1;
 }

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 8e9cd6a..6a44d64 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c

@@ -36,7 +36,6 @@
 #include <linux/smp_lock.h>
 #include <linux/major.h>
 #include <linux/fs.h>
-#include <linux/smp_lock.h>
 #include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>

diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 15e6c6b..e90a60e 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c

@@ -7,9 +7,8 @@
 
 #include <linux/errno.h>
 #include <linux/crash_dump.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
 
 /**
  * copy_oldmem_page - copy one page from "oldmem"
@@ -25,7 +24,7 @@
  * in the current kernel. We stitch up a pte, similar to kmap_atomic.
  */
 ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
-                               size_t csize, unsigned long offset, int userbuf)
+		size_t csize, unsigned long offset, int userbuf)
 {
 	void  *vaddr;
 
@@ -33,14 +32,16 @@
 		return 0;
 
 	vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+	if (!vaddr)
+		return -ENOMEM;
 
 	if (userbuf) {
-		if (copy_to_user(buf, (vaddr + offset), csize)) {
+		if (copy_to_user(buf, vaddr + offset, csize)) {
 			iounmap(vaddr);
 			return -EFAULT;
 		}
 	} else
-	memcpy(buf, (vaddr + offset), csize);
+		memcpy(buf, vaddr + offset, csize);
 
 	iounmap(vaddr);
 	return csize;

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 11c11b8..2b69994 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c

@@ -2,26 +2,49 @@
  * Debug Store support
  *
  * This provides a low-level interface to the hardware's Debug Store
- * feature that is used for last branch recording (LBR) and
+ * feature that is used for branch trace store (BTS) and
  * precise-event based sampling (PEBS).
  *
- * Different architectures use a different DS layout/pointer size.
- * The below functions therefore work on a void*.
+ * It manages:
+ * - per-thread and per-cpu allocation of BTS and PEBS
+ * - buffer memory allocation (optional)
+ * - buffer overflow handling
+ * - buffer access
+ *
+ * It assumes:
+ * - get_task_struct on all parameter tasks
+ * - current is allowed to trace parameter tasks
  *
  *
- * Since there is no user for PEBS, yet, only LBR (or branch
- * trace store, BTS) is supported.
- *
- *
- * Copyright (C) 2007 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ * Copyright (C) 2007-2008 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
  */
 
+
+#ifdef CONFIG_X86_DS
+
 #include <asm/ds.h>
 
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+
+/*
+ * The configuration for a particular DS hardware implementation.
+ */
+struct ds_configuration {
+	/* the size of the DS structure in bytes */
+	unsigned char  sizeof_ds;
+	/* the size of one pointer-typed field in the DS structure in bytes;
+	   this covers the first 8 fields related to buffer management. */
+	unsigned char  sizeof_field;
+	/* the size of a BTS/PEBS record in bytes */
+	unsigned char  sizeof_rec[2];
+};
+static struct ds_configuration ds_cfg;
 
 
 /*
@@ -44,378 +67,747 @@
  *   (interrupt occurs when write pointer passes interrupt pointer)
  * - value to which counter is reset following counter overflow
  *
- * On later architectures, the last branch recording hardware uses
- * 64bit pointers even in 32bit mode.
+ * Later architectures use 64bit pointers throughout, whereas earlier
+ * architectures use 32bit pointers in 32bit mode.
  *
  *
- * Branch Trace Store (BTS) records store information about control
- * flow changes. They at least provide the following information:
- * - source linear address
- * - destination linear address
+ * We compute the base address for the first 8 fields based on:
+ * - the field size stored in the DS configuration
+ * - the relative field position
+ * - an offset giving the start of the respective region
  *
- * Netburst supported a predicated bit that had been dropped in later
- * architectures. We do not suppor it.
+ * This offset is further used to index various arrays holding
+ * information for BTS and PEBS at the respective index.
  *
- *
- * In order to abstract from the actual DS and BTS layout, we describe
- * the access to the relevant fields.
- * Thanks to Andi Kleen for proposing this design.
- *
- * The implementation, however, is not as general as it might seem. In
- * order to stay somewhat simple and efficient, we assume an
- * underlying unsigned type (mostly a pointer type) and we expect the
- * field to be at least as big as that type.
+ * On later 32bit processors, we only access the lower 32bit of the
+ * 64bit pointer fields. The upper halves will be zeroed out.
  */
 
-/*
- * A special from_ip address to indicate that the BTS record is an
- * info record that needs to be interpreted or skipped.
- */
-#define BTS_ESCAPE_ADDRESS (-1)
-
-/*
- * A field access descriptor
- */
-struct access_desc {
-	unsigned char offset;
-	unsigned char size;
+enum ds_field {
+	ds_buffer_base = 0,
+	ds_index,
+	ds_absolute_maximum,
+	ds_interrupt_threshold,
 };
 
-/*
- * The configuration for a particular DS/BTS hardware implementation.
- */
-struct ds_configuration {
-	/* the DS configuration */
-	unsigned char  sizeof_ds;
-	struct access_desc bts_buffer_base;
-	struct access_desc bts_index;
-	struct access_desc bts_absolute_maximum;
-	struct access_desc bts_interrupt_threshold;
-	/* the BTS configuration */
-	unsigned char  sizeof_bts;
-	struct access_desc from_ip;
-	struct access_desc to_ip;
-	/* BTS variants used to store additional information like
-	   timestamps */
-	struct access_desc info_type;
-	struct access_desc info_data;
-	unsigned long debugctl_mask;
+enum ds_qualifier {
+	ds_bts  = 0,
+	ds_pebs
 };
 
-/*
- * The global configuration used by the below accessor functions
- */
-static struct ds_configuration ds_cfg;
+static inline unsigned long ds_get(const unsigned char *base,
+				   enum ds_qualifier qual, enum ds_field field)
+{
+	base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+	return *(unsigned long *)base;
+}
+
+static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
+			  enum ds_field field, unsigned long value)
+{
+	base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+	(*(unsigned long *)base) = value;
+}
+
 
 /*
- * Accessor functions for some DS and BTS fields using the above
- * global ptrace_bts_cfg.
+ * Locking is done only for allocating BTS or PEBS resources and for
+ * guarding context and buffer memory allocation.
+ *
+ * Most functions require the current task to own the ds context part
+ * they are going to access. All the locking is done when validating
+ * access to the context.
  */
-static inline unsigned long get_bts_buffer_base(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset);
-}
-static inline void set_bts_buffer_base(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value;
-}
-static inline unsigned long get_bts_index(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.bts_index.offset);
-}
-static inline void set_bts_index(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value;
-}
-static inline unsigned long get_bts_absolute_maximum(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset);
-}
-static inline void set_bts_absolute_maximum(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value;
-}
-static inline unsigned long get_bts_interrupt_threshold(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset);
-}
-static inline void set_bts_interrupt_threshold(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value;
-}
-static inline unsigned long get_from_ip(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.from_ip.offset);
-}
-static inline void set_from_ip(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value;
-}
-static inline unsigned long get_to_ip(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.to_ip.offset);
-}
-static inline void set_to_ip(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value;
-}
-static inline unsigned char get_info_type(char *base)
-{
-	return *(unsigned char *)(base + ds_cfg.info_type.offset);
-}
-static inline void set_info_type(char *base, unsigned char value)
-{
-	(*(unsigned char *)(base + ds_cfg.info_type.offset)) = value;
-}
-static inline unsigned long get_info_data(char *base)
-{
-	return *(unsigned long *)(base + ds_cfg.info_data.offset);
-}
-static inline void set_info_data(char *base, unsigned long value)
-{
-	(*(unsigned long *)(base + ds_cfg.info_data.offset)) = value;
-}
+static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
 
-
-int ds_allocate(void **dsp, size_t bts_size_in_bytes)
+/*
+ * Validate that the current task is allowed to access the BTS/PEBS
+ * buffer of the parameter task.
+ *
+ * Returns 0, if access is granted; -Eerrno, otherwise.
+ */
+static inline int ds_validate_access(struct ds_context *context,
+				     enum ds_qualifier qual)
 {
-	size_t bts_size_in_records;
-	unsigned long bts;
-	void *ds;
+	if (!context)
+		return -EPERM;
 
-	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
-		return -EOPNOTSUPP;
-
-	if (bts_size_in_bytes < 0)
-		return -EINVAL;
-
-	bts_size_in_records =
-		bts_size_in_bytes / ds_cfg.sizeof_bts;
-	bts_size_in_bytes =
-		bts_size_in_records * ds_cfg.sizeof_bts;
-
-	if (bts_size_in_bytes <= 0)
-		return -EINVAL;
-
-	bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL);
-
-	if (!bts)
-		return -ENOMEM;
-
-	ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
-
-	if (!ds) {
-		kfree((void *)bts);
-		return -ENOMEM;
-	}
-
-	set_bts_buffer_base(ds, bts);
-	set_bts_index(ds, bts);
-	set_bts_absolute_maximum(ds, bts + bts_size_in_bytes);
-	set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1);
-
-	*dsp = ds;
-	return 0;
-}
-
-int ds_free(void **dsp)
-{
-	if (*dsp) {
-		kfree((void *)get_bts_buffer_base(*dsp));
-		kfree(*dsp);
-		*dsp = NULL;
-	}
-	return 0;
-}
-
-int ds_get_bts_size(void *ds)
-{
-	int size_in_bytes;
-
-	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
-		return -EOPNOTSUPP;
-
-	if (!ds)
+	if (context->owner[qual] == current)
 		return 0;
 
-	size_in_bytes =
-		get_bts_absolute_maximum(ds) -
-		get_bts_buffer_base(ds);
-	return size_in_bytes;
+	return -EPERM;
 }
 
-int ds_get_bts_end(void *ds)
+
+/*
+ * We either support (system-wide) per-cpu or per-thread allocation.
+ * We distinguish the two based on the task_struct pointer, where a
+ * NULL pointer indicates per-cpu allocation for the current cpu.
+ *
+ * Allocations are use-counted. As soon as resources are allocated,
+ * further allocations must be of the same type (per-cpu or
+ * per-thread). We model this by counting allocations (i.e. the number
+ * of tracers of a certain type) for one type negatively:
+ *   =0  no tracers
+ *   >0  number of per-thread tracers
+ *   <0  number of per-cpu tracers
+ *
+ * The below functions to get and put tracers and to check the
+ * allocation type require the ds_lock to be held by the caller.
+ *
+ * Tracers essentially gives the number of ds contexts for a certain
+ * type of allocation.
+ */
+static long tracers;
+
+static inline void get_tracer(struct task_struct *task)
 {
-	int size_in_bytes = ds_get_bts_size(ds);
-
-	if (size_in_bytes <= 0)
-		return size_in_bytes;
-
-	return size_in_bytes / ds_cfg.sizeof_bts;
+	tracers += (task ? 1 : -1);
 }
 
-int ds_get_bts_index(void *ds)
+static inline void put_tracer(struct task_struct *task)
 {
-	int index_offset_in_bytes;
-
-	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
-		return -EOPNOTSUPP;
-
-	index_offset_in_bytes =
-		get_bts_index(ds) -
-		get_bts_buffer_base(ds);
-
-	return index_offset_in_bytes / ds_cfg.sizeof_bts;
+	tracers -= (task ? 1 : -1);
 }
 
-int ds_set_overflow(void *ds, int method)
+static inline int check_tracer(struct task_struct *task)
 {
-	switch (method) {
-	case DS_O_SIGNAL:
-		return -EOPNOTSUPP;
-	case DS_O_WRAP:
-		return 0;
-	default:
-		return -EINVAL;
-	}
+	return (task ? (tracers >= 0) : (tracers <= 0));
 }
 
-int ds_get_overflow(void *ds)
+
+/*
+ * The DS context is either attached to a thread or to a cpu:
+ * - in the former case, the thread_struct contains a pointer to the
+ *   attached context.
+ * - in the latter case, we use a static array of per-cpu context
+ *   pointers.
+ *
+ * Contexts are use-counted. They are allocated on first access and
+ * deallocated when the last user puts the context.
+ *
+ * We distinguish between an allocating and a non-allocating get of a
+ * context:
+ * - the allocating get is used for requesting BTS/PEBS resources. It
+ *   requires the caller to hold the global ds_lock.
+ * - the non-allocating get is used for all other cases. A
+ *   non-existing context indicates an error. It acquires and releases
+ *   the ds_lock itself for obtaining the context.
+ *
+ * A context and its DS configuration are allocated and deallocated
+ * together. A context always has a DS configuration of the
+ * appropriate size.
+ */
+static DEFINE_PER_CPU(struct ds_context *, system_context);
+
+#define this_system_context per_cpu(system_context, smp_processor_id())
+
+/*
+ * Returns the pointer to the parameter task's context or to the
+ * system-wide context, if task is NULL.
+ *
+ * Increases the use count of the returned context, if not NULL.
+ */
+static inline struct ds_context *ds_get_context(struct task_struct *task)
 {
-	return DS_O_WRAP;
+	struct ds_context *context;
+
+	spin_lock(&ds_lock);
+
+	context = (task ? task->thread.ds_ctx : this_system_context);
+	if (context)
+		context->count++;
+
+	spin_unlock(&ds_lock);
+
+	return context;
 }
 
-int ds_clear(void *ds)
+/*
+ * Same as ds_get_context, but allocates the context and it's DS
+ * structure, if necessary; returns NULL; if out of memory.
+ *
+ * pre: requires ds_lock to be held
+ */
+static inline struct ds_context *ds_alloc_context(struct task_struct *task)
 {
-	int bts_size = ds_get_bts_size(ds);
-	unsigned long bts_base;
+	struct ds_context **p_context =
+		(task ? &task->thread.ds_ctx : &this_system_context);
+	struct ds_context *context = *p_context;
 
-	if (bts_size <= 0)
-		return bts_size;
+	if (!context) {
+		context = kzalloc(sizeof(*context), GFP_KERNEL);
 
-	bts_base = get_bts_buffer_base(ds);
-	memset((void *)bts_base, 0, bts_size);
+		if (!context)
+			return NULL;
 
-	set_bts_index(ds, bts_base);
-	return 0;
-}
+		context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
+		if (!context->ds) {
+			kfree(context);
+			return NULL;
+		}
 
-int ds_read_bts(void *ds, int index, struct bts_struct *out)
-{
-	void *bts;
+		*p_context = context;
 
-	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
-		return -EOPNOTSUPP;
+		context->this = p_context;
+		context->task = task;
 
-	if (index < 0)
-		return -EINVAL;
+		if (task)
+			set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
 
-	if (index >= ds_get_bts_size(ds))
-		return -EINVAL;
+		if (!task || (task == current))
+			wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
 
-	bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts));
-
-	memset(out, 0, sizeof(*out));
-	if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) {
-		out->qualifier       = get_info_type(bts);
-		out->variant.jiffies = get_info_data(bts);
-	} else {
-		out->qualifier = BTS_BRANCH;
-		out->variant.lbr.from_ip = get_from_ip(bts);
-		out->variant.lbr.to_ip   = get_to_ip(bts);
+		get_tracer(task);
 	}
 
-	return sizeof(*out);;
+	context->count++;
+
+	return context;
 }
 
-int ds_write_bts(void *ds, const struct bts_struct *in)
+/*
+ * Decreases the use count of the parameter context, if not NULL.
+ * Deallocates the context, if the use count reaches zero.
+ */
+static inline void ds_put_context(struct ds_context *context)
 {
-	unsigned long bts;
+	if (!context)
+		return;
 
-	if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts)
+	spin_lock(&ds_lock);
+
+	if (--context->count)
+		goto out;
+
+	*(context->this) = NULL;
+
+	if (context->task)
+		clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+
+	if (!context->task || (context->task == current))
+		wrmsrl(MSR_IA32_DS_AREA, 0);
+
+	put_tracer(context->task);
+
+	/* free any leftover buffers from tracers that did not
+	 * deallocate them properly. */
+	kfree(context->buffer[ds_bts]);
+	kfree(context->buffer[ds_pebs]);
+	kfree(context->ds);
+	kfree(context);
+ out:
+	spin_unlock(&ds_lock);
+}
+
+
+/*
+ * Handle a buffer overflow
+ *
+ * task: the task whose buffers are overflowing;
+ *       NULL for a buffer overflow on the current cpu
+ * context: the ds context
+ * qual: the buffer type
+ */
+static void ds_overflow(struct task_struct *task, struct ds_context *context,
+			enum ds_qualifier qual)
+{
+	if (!context)
+		return;
+
+	if (context->callback[qual])
+		(*context->callback[qual])(task);
+
+	/* todo: do some more overflow handling */
+}
+
+
+/*
+ * Allocate a non-pageable buffer of the parameter size.
+ * Checks the memory and the locked memory rlimit.
+ *
+ * Returns the buffer, if successful;
+ *         NULL, if out of memory or rlimit exceeded.
+ *
+ * size: the requested buffer size in bytes
+ * pages (out): if not NULL, contains the number of pages reserved
+ */
+static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
+{
+	unsigned long rlim, vm, pgsz;
+	void *buffer;
+
+	pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+	vm   = current->mm->total_vm  + pgsz;
+	if (rlim < vm)
+		return NULL;
+
+	rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+	vm   = current->mm->locked_vm  + pgsz;
+	if (rlim < vm)
+		return NULL;
+
+	buffer = kzalloc(size, GFP_KERNEL);
+	if (!buffer)
+		return NULL;
+
+	current->mm->total_vm  += pgsz;
+	current->mm->locked_vm += pgsz;
+
+	if (pages)
+		*pages = pgsz;
+
+	return buffer;
+}
+
+static int ds_request(struct task_struct *task, void *base, size_t size,
+		      ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
+{
+	struct ds_context *context;
+	unsigned long buffer, adj;
+	const unsigned long alignment = (1 << 3);
+	int error = 0;
+
+	if (!ds_cfg.sizeof_ds)
 		return -EOPNOTSUPP;
 
-	if (ds_get_bts_size(ds) <= 0)
-		return -ENXIO;
-
-	bts = get_bts_index(ds);
-
-	memset((void *)bts, 0, ds_cfg.sizeof_bts);
-	switch (in->qualifier) {
-	case BTS_INVALID:
-		break;
-
-	case BTS_BRANCH:
-		set_from_ip((void *)bts, in->variant.lbr.from_ip);
-		set_to_ip((void *)bts, in->variant.lbr.to_ip);
-		break;
-
-	case BTS_TASK_ARRIVES:
-	case BTS_TASK_DEPARTS:
-		set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS);
-		set_info_type((void *)bts, in->qualifier);
-		set_info_data((void *)bts, in->variant.jiffies);
-		break;
-
-	default:
+	/* we require some space to do alignment adjustments below */
+	if (size < (alignment + ds_cfg.sizeof_rec[qual]))
 		return -EINVAL;
+
+	/* buffer overflow notification is not yet implemented */
+	if (ovfl)
+		return -EOPNOTSUPP;
+
+
+	spin_lock(&ds_lock);
+
+	if (!check_tracer(task))
+		return -EPERM;
+
+	error = -ENOMEM;
+	context = ds_alloc_context(task);
+	if (!context)
+		goto out_unlock;
+
+	error = -EALREADY;
+	if (context->owner[qual] == current)
+		goto out_unlock;
+	error = -EPERM;
+	if (context->owner[qual] != NULL)
+		goto out_unlock;
+	context->owner[qual] = current;
+
+	spin_unlock(&ds_lock);
+
+
+	error = -ENOMEM;
+	if (!base) {
+		base = ds_allocate_buffer(size, &context->pages[qual]);
+		if (!base)
+			goto out_release;
+
+		context->buffer[qual]   = base;
+	}
+	error = 0;
+
+	context->callback[qual] = ovfl;
+
+	/* adjust the buffer address and size to meet alignment
+	 * constraints:
+	 * - buffer is double-word aligned
+	 * - size is multiple of record size
+	 *
+	 * We checked the size at the very beginning; we have enough
+	 * space to do the adjustment.
+	 */
+	buffer = (unsigned long)base;
+
+	adj = ALIGN(buffer, alignment) - buffer;
+	buffer += adj;
+	size   -= adj;
+
+	size /= ds_cfg.sizeof_rec[qual];
+	size *= ds_cfg.sizeof_rec[qual];
+
+	ds_set(context->ds, qual, ds_buffer_base, buffer);
+	ds_set(context->ds, qual, ds_index, buffer);
+	ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
+
+	if (ovfl) {
+		/* todo: select a suitable interrupt threshold */
+	} else
+		ds_set(context->ds, qual,
+		       ds_interrupt_threshold, buffer + size + 1);
+
+	/* we keep the context until ds_release */
+	return error;
+
+ out_release:
+	context->owner[qual] = NULL;
+	ds_put_context(context);
+	return error;
+
+ out_unlock:
+	spin_unlock(&ds_lock);
+	ds_put_context(context);
+	return error;
+}
+
+int ds_request_bts(struct task_struct *task, void *base, size_t size,
+		   ds_ovfl_callback_t ovfl)
+{
+	return ds_request(task, base, size, ovfl, ds_bts);
+}
+
+int ds_request_pebs(struct task_struct *task, void *base, size_t size,
+		    ds_ovfl_callback_t ovfl)
+{
+	return ds_request(task, base, size, ovfl, ds_pebs);
+}
+
+static int ds_release(struct task_struct *task, enum ds_qualifier qual)
+{
+	struct ds_context *context;
+	int error;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, qual);
+	if (error < 0)
+		goto out;
+
+	kfree(context->buffer[qual]);
+	context->buffer[qual] = NULL;
+
+	current->mm->total_vm  -= context->pages[qual];
+	current->mm->locked_vm -= context->pages[qual];
+	context->pages[qual] = 0;
+	context->owner[qual] = NULL;
+
+	/*
+	 * we put the context twice:
+	 *   once for the ds_get_context
+	 *   once for the corresponding ds_request
+	 */
+	ds_put_context(context);
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+int ds_release_bts(struct task_struct *task)
+{
+	return ds_release(task, ds_bts);
+}
+
+int ds_release_pebs(struct task_struct *task)
+{
+	return ds_release(task, ds_pebs);
+}
+
+static int ds_get_index(struct task_struct *task, size_t *pos,
+			enum ds_qualifier qual)
+{
+	struct ds_context *context;
+	unsigned long base, index;
+	int error;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, qual);
+	if (error < 0)
+		goto out;
+
+	base  = ds_get(context->ds, qual, ds_buffer_base);
+	index = ds_get(context->ds, qual, ds_index);
+
+	error = ((index - base) / ds_cfg.sizeof_rec[qual]);
+	if (pos)
+		*pos = error;
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+int ds_get_bts_index(struct task_struct *task, size_t *pos)
+{
+	return ds_get_index(task, pos, ds_bts);
+}
+
+int ds_get_pebs_index(struct task_struct *task, size_t *pos)
+{
+	return ds_get_index(task, pos, ds_pebs);
+}
+
+static int ds_get_end(struct task_struct *task, size_t *pos,
+		      enum ds_qualifier qual)
+{
+	struct ds_context *context;
+	unsigned long base, end;
+	int error;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, qual);
+	if (error < 0)
+		goto out;
+
+	base = ds_get(context->ds, qual, ds_buffer_base);
+	end  = ds_get(context->ds, qual, ds_absolute_maximum);
+
+	error = ((end - base) / ds_cfg.sizeof_rec[qual]);
+	if (pos)
+		*pos = error;
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+int ds_get_bts_end(struct task_struct *task, size_t *pos)
+{
+	return ds_get_end(task, pos, ds_bts);
+}
+
+int ds_get_pebs_end(struct task_struct *task, size_t *pos)
+{
+	return ds_get_end(task, pos, ds_pebs);
+}
+
+static int ds_access(struct task_struct *task, size_t index,
+		     const void **record, enum ds_qualifier qual)
+{
+	struct ds_context *context;
+	unsigned long base, idx;
+	int error;
+
+	if (!record)
+		return -EINVAL;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, qual);
+	if (error < 0)
+		goto out;
+
+	base = ds_get(context->ds, qual, ds_buffer_base);
+	idx = base + (index * ds_cfg.sizeof_rec[qual]);
+
+	error = -EINVAL;
+	if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
+		goto out;
+
+	*record = (const void *)idx;
+	error = ds_cfg.sizeof_rec[qual];
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+int ds_access_bts(struct task_struct *task, size_t index, const void **record)
+{
+	return ds_access(task, index, record, ds_bts);
+}
+
+int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
+{
+	return ds_access(task, index, record, ds_pebs);
+}
+
+static int ds_write(struct task_struct *task, const void *record, size_t size,
+		    enum ds_qualifier qual, int force)
+{
+	struct ds_context *context;
+	int error;
+
+	if (!record)
+		return -EINVAL;
+
+	error = -EPERM;
+	context = ds_get_context(task);
+	if (!context)
+		goto out;
+
+	if (!force) {
+		error = ds_validate_access(context, qual);
+		if (error < 0)
+			goto out;
 	}
 
-	bts = bts + ds_cfg.sizeof_bts;
-	if (bts >= get_bts_absolute_maximum(ds))
-		bts = get_bts_buffer_base(ds);
-	set_bts_index(ds, bts);
+	error = 0;
+	while (size) {
+		unsigned long base, index, end, write_end, int_th;
+		unsigned long write_size, adj_write_size;
 
-	return ds_cfg.sizeof_bts;
+		/*
+		 * write as much as possible without producing an
+		 * overflow interrupt.
+		 *
+		 * interrupt_threshold must either be
+		 * - bigger than absolute_maximum or
+		 * - point to a record between buffer_base and absolute_maximum
+		 *
+		 * index points to a valid record.
+		 */
+		base   = ds_get(context->ds, qual, ds_buffer_base);
+		index  = ds_get(context->ds, qual, ds_index);
+		end    = ds_get(context->ds, qual, ds_absolute_maximum);
+		int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
+
+		write_end = min(end, int_th);
+
+		/* if we are already beyond the interrupt threshold,
+		 * we fill the entire buffer */
+		if (write_end <= index)
+			write_end = end;
+
+		if (write_end <= index)
+			goto out;
+
+		write_size = min((unsigned long) size, write_end - index);
+		memcpy((void *)index, record, write_size);
+
+		record = (const char *)record + write_size;
+		size  -= write_size;
+		error += write_size;
+
+		adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
+		adj_write_size *= ds_cfg.sizeof_rec[qual];
+
+		/* zero out trailing bytes */
+		memset((char *)index + write_size, 0,
+		       adj_write_size - write_size);
+		index += adj_write_size;
+
+		if (index >= end)
+			index = base;
+		ds_set(context->ds, qual, ds_index, index);
+
+		if (index >= int_th)
+			ds_overflow(task, context, qual);
+	}
+
+ out:
+	ds_put_context(context);
+	return error;
 }
 
-unsigned long ds_debugctl_mask(void)
+int ds_write_bts(struct task_struct *task, const void *record, size_t size)
 {
-	return ds_cfg.debugctl_mask;
+	return ds_write(task, record, size, ds_bts, /* force = */ 0);
 }
 
-#ifdef __i386__
-static const struct ds_configuration ds_cfg_netburst = {
-	.sizeof_ds = 9 * 4,
-	.bts_buffer_base = { 0, 4 },
-	.bts_index = { 4, 4 },
-	.bts_absolute_maximum = { 8, 4 },
-	.bts_interrupt_threshold = { 12, 4 },
-	.sizeof_bts = 3 * 4,
-	.from_ip = { 0, 4 },
-	.to_ip = { 4, 4 },
-	.info_type = { 4, 1 },
-	.info_data = { 8, 4 },
-	.debugctl_mask = (1<<2)|(1<<3)
+int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
+{
+	return ds_write(task, record, size, ds_pebs, /* force = */ 0);
+}
+
+int ds_unchecked_write_bts(struct task_struct *task,
+			   const void *record, size_t size)
+{
+	return ds_write(task, record, size, ds_bts, /* force = */ 1);
+}
+
+int ds_unchecked_write_pebs(struct task_struct *task,
+			    const void *record, size_t size)
+{
+	return ds_write(task, record, size, ds_pebs, /* force = */ 1);
+}
+
+static int ds_reset_or_clear(struct task_struct *task,
+			     enum ds_qualifier qual, int clear)
+{
+	struct ds_context *context;
+	unsigned long base, end;
+	int error;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, qual);
+	if (error < 0)
+		goto out;
+
+	base = ds_get(context->ds, qual, ds_buffer_base);
+	end  = ds_get(context->ds, qual, ds_absolute_maximum);
+
+	if (clear)
+		memset((void *)base, 0, end - base);
+
+	ds_set(context->ds, qual, ds_index, base);
+
+	error = 0;
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+int ds_reset_bts(struct task_struct *task)
+{
+	return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
+}
+
+int ds_reset_pebs(struct task_struct *task)
+{
+	return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
+}
+
+int ds_clear_bts(struct task_struct *task)
+{
+	return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
+}
+
+int ds_clear_pebs(struct task_struct *task)
+{
+	return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
+}
+
+int ds_get_pebs_reset(struct task_struct *task, u64 *value)
+{
+	struct ds_context *context;
+	int error;
+
+	if (!value)
+		return -EINVAL;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, ds_pebs);
+	if (error < 0)
+		goto out;
+
+	*value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
+
+	error = 0;
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+int ds_set_pebs_reset(struct task_struct *task, u64 value)
+{
+	struct ds_context *context;
+	int error;
+
+	context = ds_get_context(task);
+	error = ds_validate_access(context, ds_pebs);
+	if (error < 0)
+		goto out;
+
+	*(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
+
+	error = 0;
+ out:
+	ds_put_context(context);
+	return error;
+}
+
+static const struct ds_configuration ds_cfg_var = {
+	.sizeof_ds    = sizeof(long) * 12,
+	.sizeof_field = sizeof(long),
+	.sizeof_rec[ds_bts]   = sizeof(long) * 3,
+	.sizeof_rec[ds_pebs]  = sizeof(long) * 10
 };
-
-static const struct ds_configuration ds_cfg_pentium_m = {
-	.sizeof_ds = 9 * 4,
-	.bts_buffer_base = { 0, 4 },
-	.bts_index = { 4, 4 },
-	.bts_absolute_maximum = { 8, 4 },
-	.bts_interrupt_threshold = { 12, 4 },
-	.sizeof_bts = 3 * 4,
-	.from_ip = { 0, 4 },
-	.to_ip = { 4, 4 },
-	.info_type = { 4, 1 },
-	.info_data = { 8, 4 },
-	.debugctl_mask = (1<<6)|(1<<7)
-};
-#endif /* _i386_ */
-
-static const struct ds_configuration ds_cfg_core2 = {
-	.sizeof_ds = 9 * 8,
-	.bts_buffer_base = { 0, 8 },
-	.bts_index = { 8, 8 },
-	.bts_absolute_maximum = { 16, 8 },
-	.bts_interrupt_threshold = { 24, 8 },
-	.sizeof_bts = 3 * 8,
-	.from_ip = { 0, 8 },
-	.to_ip = { 8, 8 },
-	.info_type = { 8, 1 },
-	.info_data = { 16, 8 },
-	.debugctl_mask = (1<<6)|(1<<7)|(1<<9)
+static const struct ds_configuration ds_cfg_64 = {
+	.sizeof_ds    = 8 * 12,
+	.sizeof_field = 8,
+	.sizeof_rec[ds_bts]   = 8 * 3,
+	.sizeof_rec[ds_pebs]  = 8 * 10
 };
 
 static inline void
@@ -429,14 +821,13 @@
 	switch (c->x86) {
 	case 0x6:
 		switch (c->x86_model) {
-#ifdef __i386__
 		case 0xD:
 		case 0xE: /* Pentium M */
-			ds_configure(&ds_cfg_pentium_m);
+			ds_configure(&ds_cfg_var);
 			break;
-#endif /* _i386_ */
 		case 0xF: /* Core2 */
-			ds_configure(&ds_cfg_core2);
+		case 0x1C: /* Atom */
+			ds_configure(&ds_cfg_64);
 			break;
 		default:
 			/* sorry, don't know about them */
@@ -445,13 +836,11 @@
 		break;
 	case 0xF:
 		switch (c->x86_model) {
-#ifdef __i386__
 		case 0x0:
 		case 0x1:
 		case 0x2: /* Netburst */
-			ds_configure(&ds_cfg_netburst);
+			ds_configure(&ds_cfg_var);
 			break;
-#endif /* _i386_ */
 		default:
 			/* sorry, don't know about them */
 			break;
@@ -462,3 +851,14 @@
 		break;
 	}
 }
+
+void ds_free(struct ds_context *context)
+{
+	/* This is called when the task owning the parameter context
+	 * is dying. There should not be any user of that context left
+	 * to disturb us, anymore. */
+	unsigned long leftovers = context->count;
+	while (leftovers--)
+		ds_put_context(context);
+}
+#endif /* CONFIG_X86_DS */

diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 06cc8d4..945a31c 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c

@@ -414,9 +414,11 @@
 	if (memmap.map == NULL)
 		printk(KERN_ERR "Could not map the EFI memory map!\n");
 	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+
 	if (memmap.desc_size != sizeof(efi_memory_desc_t))
-		printk(KERN_WARNING "Kernel-defined memdesc"
-		       "doesn't match the one from EFI!\n");
+		printk(KERN_WARNING
+		  "Kernel-defined memdesc doesn't match the one from EFI!\n");
+
 	if (add_efi_memmap)
 		do_add_efi_memmap();
 

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 89434d4..cf3a0b2 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S

@@ -275,9 +275,9 @@
 ENTRY(ret_from_fork)
 	CFI_DEFAULT_STACK
 	push kernel_eflags(%rip)
-	CFI_ADJUST_CFA_OFFSET 4
+	CFI_ADJUST_CFA_OFFSET 8
 	popf				# reset kernel eflags
-	CFI_ADJUST_CFA_OFFSET -4
+	CFI_ADJUST_CFA_OFFSET -8
 	call schedule_tail
 	GET_THREAD_INFO(%rcx)
 	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 9bfc4d7..d16084f 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c

@@ -108,12 +108,11 @@
 	}
 	load_idt((const struct desc_ptr *)&idt_descr);
 
-	early_printk("Kernel alive\n");
+	if (console_loglevel == 10)
+		early_printk("Kernel alive\n");
 
 	x86_64_init_pda();
 
-	early_printk("Kernel really alive\n");
-
 	x86_64_start_reservations(real_mode_data);
 }
 

diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 50e5e4a..1919143 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c

@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/thread_info.h>
 #include <linux/syscalls.h>
+#include <asm/syscalls.h>
 
 /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
 static void set_bitmap(unsigned long *bitmap, unsigned int base,

diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index 3f7537b..f1c688e 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c

@@ -20,6 +20,8 @@
 
 #ifdef CONFIG_X86_32
 #include <mach_apic.h>
+#include <mach_ipi.h>
+
 /*
  * the following functions deal with sending IPIs between CPUs.
  *
@@ -147,7 +149,6 @@
 }
 
 /* must come after the send_IPI functions above for inlining */
-#include <mach_ipi.h>
 static int convert_apicid_to_cpu(int apic_id)
 {
 	int i;

diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 1cf8c1f..b71e02d 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c

@@ -325,7 +325,7 @@
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ",
 				per_cpu(irq_stat,j).irq_call_count);
-		seq_printf(p, "  function call interrupts\n");
+		seq_printf(p, "  Function call interrupts\n");
 		seq_printf(p, "TLB: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ",

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1f78b23..f065fe9 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c

@@ -129,7 +129,7 @@
 		seq_printf(p, "CAL: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
-		seq_printf(p, "  function call interrupts\n");
+		seq_printf(p, "  Function call interrupts\n");
 		seq_printf(p, "TLB: ");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8282a21..10435a1 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c

@@ -455,12 +455,7 @@
 		return NOTIFY_DONE;
 
 	case DIE_NMI_IPI:
-		if (atomic_read(&kgdb_active) != -1) {
-			/* KGDB CPU roundup */
-			kgdb_nmicallback(raw_smp_processor_id(), regs);
-			was_in_debug_nmi[raw_smp_processor_id()] = 1;
-			touch_nmi_watchdog();
-		}
+		/* Just ignore, we will handle the roundup on DIE_NMI. */
 		return NOTIFY_DONE;
 
 	case DIE_NMIUNKNOWN:

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8b7a3cf..478bca9 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c

@@ -178,7 +178,7 @@
 	kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
 }
 
-static void kvm_release_pt(u32 pfn)
+static void kvm_release_pt(unsigned long pfn)
 {
 	struct kvm_mmu_op_release_pt rpt = {
 		.header.op = KVM_MMU_OP_RELEASE_PT,

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b68e21f..0ed5f93 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c

@@ -18,6 +18,7 @@
 #include <asm/ldt.h>
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
+#include <asm/syscalls.h>
 
 #ifdef CONFIG_SMP
 static void flush_ldt(void *current_mm)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index abb78a2..2c97f07 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c

@@ -299,6 +299,15 @@
 		on_each_cpu(__acpi_nmi_disable, NULL, 1);
 }
 
+/*
+ * This function is called as soon the LAPIC NMI watchdog driver has everything
+ * in place and it's ready to check if the NMIs belong to the NMI watchdog
+ */
+void cpu_nmi_set_wd_enabled(void)
+{
+	__get_cpu_var(wd_enabled) = 1;
+}
+
 void setup_apic_nmi_watchdog(void *unused)
 {
 	if (__get_cpu_var(wd_enabled))
@@ -311,8 +320,6 @@
 
 	switch (nmi_watchdog) {
 	case NMI_LOCAL_APIC:
-		 /* enable it before to avoid race with handler */
-		__get_cpu_var(wd_enabled) = 1;
 		if (lapic_watchdog_init(nmi_hz) < 0) {
 			__get_cpu_var(wd_enabled) = 0;
 			return;

diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 3e66722..7a13fac 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c

@@ -190,12 +190,12 @@
 static void __init platform_detect(void)
 {
 	size_t propsize;
-	u32 rev;
+	__be32 rev;
 
 	if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4,
 			&propsize) || propsize != 4) {
 		printk(KERN_ERR "ofw: getprop call failed!\n");
-		rev = 0;
+		rev = cpu_to_be32(0);
 	}
 	olpc_platform_info.boardrev = be32_to_cpu(rev);
 }
@@ -203,7 +203,7 @@
 static void __init platform_detect(void)
 {
 	/* stopgap until OFW support is added to the kernel */
-	olpc_platform_info.boardrev = be32_to_cpu(0xc2);
+	olpc_platform_info.boardrev = 0xc2;
 }
 #endif
 

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 300da17..e2f4376 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c

@@ -330,6 +330,7 @@
 #endif
 	.wbinvd = native_wbinvd,
 	.read_msr = native_read_msr_safe,
+	.read_msr_amd = native_read_msr_amd_safe,
 	.write_msr = native_write_msr_safe,
 	.read_tsc = native_read_tsc,
 	.read_pmc = native_read_pmc,

diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 5826221..9fe644f 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c

@@ -23,7 +23,7 @@
 			start = start_##ops##_##x;		\
 			end = end_##ops##_##x;			\
 			goto patch_site
-	switch(type) {
+	switch (type) {
 		PATCH_SITE(pv_irq_ops, irq_disable);
 		PATCH_SITE(pv_irq_ops, irq_enable);
 		PATCH_SITE(pv_irq_ops, restore_fl);

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 87d4d69..f704cb5 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c

@@ -82,7 +82,7 @@
 	 * using 512M as goal
 	 */
 	align = 64ULL<<20;
-	size = round_up(dma32_bootmem_size, align);
+	size = roundup(dma32_bootmem_size, align);
 	dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
 				 512ULL<<20);
 	if (dma32_bootmem_ptr)

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 49285f8..1a895a5 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c

@@ -82,7 +82,8 @@
 static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
 static int need_flush;		/* global flush state. set for each gart wrap */
 
-static unsigned long alloc_iommu(struct device *dev, int size)
+static unsigned long alloc_iommu(struct device *dev, int size,
+				 unsigned long align_mask)
 {
 	unsigned long offset, flags;
 	unsigned long boundary_size;
@@ -90,16 +91,17 @@
 
 	base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
 			   PAGE_SIZE) >> PAGE_SHIFT;
-	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+	boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1,
 			      PAGE_SIZE) >> PAGE_SHIFT;
 
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
-				  size, base_index, boundary_size, 0);
+				  size, base_index, boundary_size, align_mask);
 	if (offset == -1) {
 		need_flush = 1;
 		offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
-					  size, base_index, boundary_size, 0);
+					  size, base_index, boundary_size,
+					  align_mask);
 	}
 	if (offset != -1) {
 		next_bit = offset+size;
@@ -236,10 +238,10 @@
  * Caller needs to check if the iommu is needed and flush.
  */
 static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
-				size_t size, int dir)
+				size_t size, int dir, unsigned long align_mask)
 {
 	unsigned long npages = iommu_num_pages(phys_mem, size);
-	unsigned long iommu_page = alloc_iommu(dev, npages);
+	unsigned long iommu_page = alloc_iommu(dev, npages, align_mask);
 	int i;
 
 	if (iommu_page == -1) {
@@ -262,7 +264,11 @@
 static dma_addr_t
 gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir)
 {
-	dma_addr_t map = dma_map_area(dev, paddr, size, dir);
+	dma_addr_t map;
+	unsigned long align_mask;
+
+	align_mask = (1UL << get_order(size)) - 1;
+	map = dma_map_area(dev, paddr, size, dir, align_mask);
 
 	flush_gart();
 
@@ -281,7 +287,8 @@
 	if (!need_iommu(dev, paddr, size))
 		return paddr;
 
-	bus = gart_map_simple(dev, paddr, size, dir);
+	bus = dma_map_area(dev, paddr, size, dir, 0);
+	flush_gart();
 
 	return bus;
 }
@@ -340,7 +347,7 @@
 		unsigned long addr = sg_phys(s);
 
 		if (nonforced_iommu(dev, addr, s->length)) {
-			addr = dma_map_area(dev, addr, s->length, dir);
+			addr = dma_map_area(dev, addr, s->length, dir, 0);
 			if (addr == bad_dma_address) {
 				if (i > 0)
 					gart_unmap_sg(dev, sg, i, dir);
@@ -362,7 +369,7 @@
 			  int nelems, struct scatterlist *sout,
 			  unsigned long pages)
 {
-	unsigned long iommu_start = alloc_iommu(dev, pages);
+	unsigned long iommu_start = alloc_iommu(dev, pages, 0);
 	unsigned long iommu_page = iommu_start;
 	struct scatterlist *s;
 	int i;
@@ -626,7 +633,6 @@
 	struct pci_dev *dev;
 	void *gatt;
 	int i, error;
-	unsigned long start_pfn, end_pfn;
 
 	printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
 	aper_size = aper_base = info->aper_size = 0;
@@ -672,12 +678,6 @@
 	printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
 	       aper_base, aper_size>>10);
 
-	/* need to map that range */
-	end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
-	if (end_pfn > max_low_pfn_mapped) {
-		start_pfn = (aper_base>>PAGE_SHIFT);
-		init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
-	}
 	return 0;
 
  nommu:
@@ -727,7 +727,8 @@
 {
 	struct agp_kern_info info;
 	unsigned long iommu_start;
-	unsigned long aper_size;
+	unsigned long aper_base, aper_size;
+	unsigned long start_pfn, end_pfn;
 	unsigned long scratch;
 	long i;
 
@@ -765,8 +766,16 @@
 		return;
 	}
 
+	/* need to map that range */
+	aper_size = info.aper_size << 20;
+	aper_base = info.aper_base;
+	end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
+	if (end_pfn > max_low_pfn_mapped) {
+		start_pfn = (aper_base>>PAGE_SHIFT);
+		init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+	}
+
 	printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
-	aper_size = info.aper_size * 1024 * 1024;
 	iommu_size = check_iommu_size(info.aper_base, aper_size);
 	iommu_pages = iommu_size >> PAGE_SHIFT;
 

diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c
index bc1f2d3..a311ffc 100644
--- a/arch/x86/kernel/pcspeaker.c
+++ b/arch/x86/kernel/pcspeaker.c

@@ -1,20 +1,13 @@
 #include <linux/platform_device.h>
-#include <linux/errno.h>
+#include <linux/err.h>
 #include <linux/init.h>
 
 static __init int add_pcspkr(void)
 {
 	struct platform_device *pd;
-	int ret;
 
-	pd = platform_device_alloc("pcspkr", -1);
-	if (!pd)
-		return -ENOMEM;
+	pd = platform_device_register_simple("pcspkr", -1, NULL, 0);
 
-	ret = platform_device_add(pd);
-	if (ret)
-		platform_device_put(pd);
-
-	return ret;
+	return IS_ERR(pd) ? PTR_ERR(pd) : 0;
 }
 device_initcall(add_pcspkr);

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 876e918..ec7a2ba 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c

@@ -185,7 +185,8 @@
 static void poll_idle(void)
 {
 	local_irq_enable();
-	cpu_relax();
+	while (!need_resched())
+		cpu_relax();
 }
 
 /*

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 31f40b2..205188d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c

@@ -37,6 +37,7 @@
 #include <linux/tick.h>
 #include <linux/percpu.h>
 #include <linux/prctl.h>
+#include <linux/dmi.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -56,6 +57,8 @@
 #include <asm/cpu.h>
 #include <asm/kdebug.h>
 #include <asm/idle.h>
+#include <asm/syscalls.h>
+#include <asm/smp.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -161,6 +164,7 @@
 	unsigned long d0, d1, d2, d3, d6, d7;
 	unsigned long sp;
 	unsigned short ss, gs;
+	const char *board;
 
 	if (user_mode_vm(regs)) {
 		sp = regs->sp;
@@ -173,11 +177,15 @@
 	}
 
 	printk("\n");
-	printk("Pid: %d, comm: %s %s (%s %.*s)\n",
+
+	board = dmi_get_system_info(DMI_PRODUCT_NAME);
+	if (!board)
+		board = "";
+	printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
 			task_pid_nr(current), current->comm,
 			print_tainted(), init_utsname()->release,
 			(int)strcspn(init_utsname()->version, " "),
-			init_utsname()->version);
+			init_utsname()->version, board);
 
 	printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
 			(u16)regs->cs, regs->ip, regs->flags,
@@ -277,6 +285,14 @@
 		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
 		put_cpu();
 	}
+#ifdef CONFIG_X86_DS
+	/* Free any DS contexts that have not been properly released. */
+	if (unlikely(current->thread.ds_ctx)) {
+		/* we clear debugctl to make sure DS is not used. */
+		update_debugctlmsr(0);
+		ds_free(current->thread.ds_ctx);
+	}
+#endif /* CONFIG_X86_DS */
 }
 
 void flush_thread(void)
@@ -438,6 +454,35 @@
 	return 0;
 }
 
+#ifdef CONFIG_X86_DS
+static int update_debugctl(struct thread_struct *prev,
+			struct thread_struct *next, unsigned long debugctl)
+{
+	unsigned long ds_prev = 0;
+	unsigned long ds_next = 0;
+
+	if (prev->ds_ctx)
+		ds_prev = (unsigned long)prev->ds_ctx->ds;
+	if (next->ds_ctx)
+		ds_next = (unsigned long)next->ds_ctx->ds;
+
+	if (ds_next != ds_prev) {
+		/* we clear debugctl to make sure DS
+		 * is not in use when we change it */
+		debugctl = 0;
+		update_debugctlmsr(0);
+		wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
+	}
+	return debugctl;
+}
+#else
+static int update_debugctl(struct thread_struct *prev,
+			struct thread_struct *next, unsigned long debugctl)
+{
+	return debugctl;
+}
+#endif /* CONFIG_X86_DS */
+
 static noinline void
 __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		 struct tss_struct *tss)
@@ -448,14 +493,7 @@
 	prev = &prev_p->thread;
 	next = &next_p->thread;
 
-	debugctl = prev->debugctlmsr;
-	if (next->ds_area_msr != prev->ds_area_msr) {
-		/* we clear debugctl to make sure DS
-		 * is not in use when we change it */
-		debugctl = 0;
-		update_debugctlmsr(0);
-		wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
-	}
+	debugctl = update_debugctl(prev, next, prev->debugctlmsr);
 
 	if (next->debugctlmsr != debugctl)
 		update_debugctlmsr(next->debugctlmsr);
@@ -479,13 +517,13 @@
 			hard_enable_TSC();
 	}
 
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
 	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
 		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
 
 	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
 		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 
 	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e12e0e4..2a8ccb9 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c

@@ -37,11 +37,11 @@
 #include <linux/kdebug.h>
 #include <linux/tick.h>
 #include <linux/prctl.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
 
-#include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
-#include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/mmu_context.h>
@@ -51,6 +51,7 @@
 #include <asm/proto.h>
 #include <asm/ia32.h>
 #include <asm/idle.h>
+#include <asm/syscalls.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -88,7 +89,7 @@
 #ifdef CONFIG_HOTPLUG_CPU
 DECLARE_PER_CPU(int, cpu_state);
 
-#include <asm/nmi.h>
+#include <linux/nmi.h>
 /* We halt the CPU with physical CPU hotplug */
 static inline void play_dead(void)
 {
@@ -153,7 +154,7 @@
 }
 
 /* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs * regs)
+void __show_regs(struct pt_regs *regs)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 	unsigned long d0, d1, d2, d3, d6, d7;
@@ -162,59 +163,61 @@
 
 	printk("\n");
 	print_modules();
-	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
 		current->pid, current->comm, print_tainted(),
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
-	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
 	printk_address(regs->ip, 1);
-	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
-		regs->flags);
-	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
+	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
+			regs->sp, regs->flags);
+	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	       regs->ax, regs->bx, regs->cx);
-	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
+	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
 	       regs->dx, regs->si, regs->di);
-	printk("RBP: %016lx R08: %016lx R09: %016lx\n",
+	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
 	       regs->bp, regs->r8, regs->r9);
-	printk("R10: %016lx R11: %016lx R12: %016lx\n",
-	       regs->r10, regs->r11, regs->r12); 
-	printk("R13: %016lx R14: %016lx R15: %016lx\n",
-	       regs->r13, regs->r14, regs->r15); 
+	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
+	       regs->r10, regs->r11, regs->r12);
+	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
+	       regs->r13, regs->r14, regs->r15);
 
-	asm("movl %%ds,%0" : "=r" (ds)); 
-	asm("movl %%cs,%0" : "=r" (cs)); 
-	asm("movl %%es,%0" : "=r" (es)); 
+	asm("movl %%ds,%0" : "=r" (ds));
+	asm("movl %%cs,%0" : "=r" (cs));
+	asm("movl %%es,%0" : "=r" (es));
 	asm("movl %%fs,%0" : "=r" (fsindex));
 	asm("movl %%gs,%0" : "=r" (gsindex));
 
 	rdmsrl(MSR_FS_BASE, fs);
-	rdmsrl(MSR_GS_BASE, gs); 
-	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 
+	rdmsrl(MSR_GS_BASE, gs);
+	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
 
 	cr0 = read_cr0();
 	cr2 = read_cr2();
 	cr3 = read_cr3();
 	cr4 = read_cr4();
 
-	printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 
-	       fs,fsindex,gs,gsindex,shadowgs); 
-	printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 
-	printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
+	printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+	       fs, fsindex, gs, gsindex, shadowgs);
+	printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
+			es, cr0);
+	printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
+			cr4);
 
 	get_debugreg(d0, 0);
 	get_debugreg(d1, 1);
 	get_debugreg(d2, 2);
-	printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
+	printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
 	get_debugreg(d3, 3);
 	get_debugreg(d6, 6);
 	get_debugreg(d7, 7);
-	printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+	printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
 }
 
 void show_regs(struct pt_regs *regs)
 {
-	printk("CPU %d:", smp_processor_id());
+	printk(KERN_INFO "CPU %d:", smp_processor_id());
 	__show_regs(regs);
 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
 }
@@ -240,6 +243,14 @@
 		t->io_bitmap_max = 0;
 		put_cpu();
 	}
+#ifdef CONFIG_X86_DS
+	/* Free any DS contexts that have not been properly released. */
+	if (unlikely(t->ds_ctx)) {
+		/* we clear debugctl to make sure DS is not used. */
+		update_debugctlmsr(0);
+		ds_free(t->ds_ctx);
+	}
+#endif /* CONFIG_X86_DS */
 }
 
 void flush_thread(void)
@@ -315,10 +326,10 @@
 
 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 		unsigned long unused,
-	struct task_struct * p, struct pt_regs * regs)
+	struct task_struct *p, struct pt_regs *regs)
 {
 	int err;
-	struct pt_regs * childregs;
+	struct pt_regs *childregs;
 	struct task_struct *me = current;
 
 	childregs = ((struct pt_regs *)
@@ -363,10 +374,10 @@
 		if (test_thread_flag(TIF_IA32))
 			err = do_set_thread_area(p, -1,
 				(struct user_desc __user *)childregs->si, 0);
-		else 			
-#endif	 
-			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
-		if (err) 
+		else
+#endif
+			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
+		if (err)
 			goto out;
 	}
 	err = 0;
@@ -473,13 +484,27 @@
 	next = &next_p->thread;
 
 	debugctl = prev->debugctlmsr;
-	if (next->ds_area_msr != prev->ds_area_msr) {
-		/* we clear debugctl to make sure DS
-		 * is not in use when we change it */
-		debugctl = 0;
-		update_debugctlmsr(0);
-		wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
+
+#ifdef CONFIG_X86_DS
+	{
+		unsigned long ds_prev = 0, ds_next = 0;
+
+		if (prev->ds_ctx)
+			ds_prev = (unsigned long)prev->ds_ctx->ds;
+		if (next->ds_ctx)
+			ds_next = (unsigned long)next->ds_ctx->ds;
+
+		if (ds_next != ds_prev) {
+			/*
+			 * We clear debugctl to make sure DS
+			 * is not in use when we change it:
+			 */
+			debugctl = 0;
+			update_debugctlmsr(0);
+			wrmsrl(MSR_IA32_DS_AREA, ds_next);
+		}
 	}
+#endif /* CONFIG_X86_DS */
 
 	if (next->debugctlmsr != debugctl)
 		update_debugctlmsr(next->debugctlmsr);
@@ -517,13 +542,13 @@
 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
 	}
 
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
 	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
 		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
 
 	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
 		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
 }
 
 /*
@@ -545,7 +570,7 @@
 	unsigned fsindex, gsindex;
 
 	/* we're going to use this soon, after a few expensive things */
-	if (next_p->fpu_counter>5)
+	if (next_p->fpu_counter > 5)
 		prefetch(next->xstate);
 
 	/*
@@ -553,13 +578,13 @@
 	 */
 	load_sp0(tss, next);
 
-	/* 
+	/*
 	 * Switch DS and ES.
 	 * This won't pick up thread selector changes, but I guess that is ok.
 	 */
 	savesegment(es, prev->es);
 	if (unlikely(next->es | prev->es))
-		loadsegment(es, next->es); 
+		loadsegment(es, next->es);
 
 	savesegment(ds, prev->ds);
 	if (unlikely(next->ds | prev->ds))
@@ -585,7 +610,7 @@
 	 */
 	arch_leave_lazy_cpu_mode();
 
-	/* 
+	/*
 	 * Switch FS and GS.
 	 *
 	 * Segment register != 0 always requires a reload.  Also
@@ -594,13 +619,13 @@
 	 */
 	if (unlikely(fsindex | next->fsindex | prev->fs)) {
 		loadsegment(fs, next->fsindex);
-		/* 
+		/*
 		 * Check if the user used a selector != 0; if yes
 		 *  clear 64bit base, since overloaded base is always
 		 *  mapped to the Null selector
 		 */
 		if (fsindex)
-			prev->fs = 0;				
+			prev->fs = 0;
 	}
 	/* when next process has a 64bit base use it */
 	if (next->fs)
@@ -610,7 +635,7 @@
 	if (unlikely(gsindex | next->gsindex | prev->gs)) {
 		load_gs_index(next->gsindex);
 		if (gsindex)
-			prev->gs = 0;				
+			prev->gs = 0;
 	}
 	if (next->gs)
 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
@@ -619,12 +644,12 @@
 	/* Must be after DS reload */
 	unlazy_fpu(prev_p);
 
-	/* 
+	/*
 	 * Switch the PDA and FPU contexts.
 	 */
 	prev->usersp = read_pda(oldrsp);
 	write_pda(oldrsp, next->usersp);
-	write_pda(pcurrent, next_p); 
+	write_pda(pcurrent, next_p);
 
 	write_pda(kernelstack,
 		  (unsigned long)task_stack_page(next_p) +
@@ -665,7 +690,7 @@
 		char __user * __user *envp, struct pt_regs *regs)
 {
 	long error;
-	char * filename;
+	char *filename;
 
 	filename = getname(name);
 	error = PTR_ERR(filename);
@@ -723,55 +748,55 @@
 unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long stack;
-	u64 fp,ip;
+	u64 fp, ip;
 	int count = 0;
 
-	if (!p || p == current || p->state==TASK_RUNNING)
-		return 0; 
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
 	stack = (unsigned long)task_stack_page(p);
 	if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
 		return 0;
 	fp = *(u64 *)(p->thread.sp);
-	do { 
+	do {
 		if (fp < (unsigned long)stack ||
 		    fp > (unsigned long)stack+THREAD_SIZE)
-			return 0; 
+			return 0;
 		ip = *(u64 *)(fp+8);
 		if (!in_sched_functions(ip))
 			return ip;
-		fp = *(u64 *)fp; 
-	} while (count++ < 16); 
+		fp = *(u64 *)fp;
+	} while (count++ < 16);
 	return 0;
 }
 
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{ 
-	int ret = 0; 
+{
+	int ret = 0;
 	int doit = task == current;
 	int cpu;
 
-	switch (code) { 
+	switch (code) {
 	case ARCH_SET_GS:
 		if (addr >= TASK_SIZE_OF(task))
-			return -EPERM; 
+			return -EPERM;
 		cpu = get_cpu();
-		/* handle small bases via the GDT because that's faster to 
+		/* handle small bases via the GDT because that's faster to
 		   switch. */
-		if (addr <= 0xffffffff) {  
-			set_32bit_tls(task, GS_TLS, addr); 
-			if (doit) { 
+		if (addr <= 0xffffffff) {
+			set_32bit_tls(task, GS_TLS, addr);
+			if (doit) {
 				load_TLS(&task->thread, cpu);
-				load_gs_index(GS_TLS_SEL); 
+				load_gs_index(GS_TLS_SEL);
 			}
-			task->thread.gsindex = GS_TLS_SEL; 
+			task->thread.gsindex = GS_TLS_SEL;
 			task->thread.gs = 0;
-		} else { 
+		} else {
 			task->thread.gsindex = 0;
 			task->thread.gs = addr;
 			if (doit) {
 				load_gs_index(0);
 				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
-			} 
+			}
 		}
 		put_cpu();
 		break;
@@ -825,8 +850,7 @@
 				rdmsrl(MSR_KERNEL_GS_BASE, base);
 			else
 				base = task->thread.gs;
-		}
-		else
+		} else
 			base = task->thread.gs;
 		ret = put_user(base, (unsigned long __user *)addr);
 		break;

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e37dccc..e375b65 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c

@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/regset.h>
+#include <linux/tracehook.h>
 #include <linux/user.h>
 #include <linux/elf.h>
 #include <linux/security.h>
@@ -69,7 +70,7 @@
 
 #define FLAG_MASK		FLAG_MASK_32
 
-static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
 {
 	BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
 	regno >>= 2;
@@ -554,45 +555,115 @@
 	return 0;
 }
 
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
+/*
+ * The configuration for a particular BTS hardware implementation.
+ */
+struct bts_configuration {
+	/* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
+	unsigned char  sizeof_bts;
+	/* the size of a field in the BTS record in bytes */
+	unsigned char  sizeof_field;
+	/* a bitmask to enable/disable BTS in DEBUGCTL MSR */
+	unsigned long debugctl_mask;
+};
+static struct bts_configuration bts_cfg;
 
-static int ptrace_bts_get_size(struct task_struct *child)
+#define BTS_MAX_RECORD_SIZE (8 * 3)
+
+
+/*
+ * Branch Trace Store (BTS) uses the following format. Different
+ * architectures vary in the size of those fields.
+ * - source linear address
+ * - destination linear address
+ * - flags
+ *
+ * Later architectures use 64bit pointers throughout, whereas earlier
+ * architectures use 32bit pointers in 32bit mode.
+ *
+ * We compute the base address for the first 8 fields based on:
+ * - the field size stored in the DS configuration
+ * - the relative field position
+ *
+ * In order to store additional information in the BTS buffer, we use
+ * a special source address to indicate that the record requires
+ * special interpretation.
+ *
+ * Netburst indicated via a bit in the flags field whether the branch
+ * was predicted; this is ignored.
+ */
+
+enum bts_field {
+	bts_from = 0,
+	bts_to,
+	bts_flags,
+
+	bts_escape = (unsigned long)-1,
+	bts_qual = bts_to,
+	bts_jiffies = bts_flags
+};
+
+static inline unsigned long bts_get(const char *base, enum bts_field field)
 {
-	if (!child->thread.ds_area_msr)
-		return -ENXIO;
-
-	return ds_get_bts_index((void *)child->thread.ds_area_msr);
+	base += (bts_cfg.sizeof_field * field);
+	return *(unsigned long *)base;
 }
 
-static int ptrace_bts_read_record(struct task_struct *child,
-				  long index,
+static inline void bts_set(char *base, enum bts_field field, unsigned long val)
+{
+	base += (bts_cfg.sizeof_field * field);;
+	(*(unsigned long *)base) = val;
+}
+
+/*
+ * Translate a BTS record from the raw format into the bts_struct format
+ *
+ * out (out): bts_struct interpretation
+ * raw: raw BTS record
+ */
+static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
+{
+	memset(out, 0, sizeof(*out));
+	if (bts_get(raw, bts_from) == bts_escape) {
+		out->qualifier       = bts_get(raw, bts_qual);
+		out->variant.jiffies = bts_get(raw, bts_jiffies);
+	} else {
+		out->qualifier = BTS_BRANCH;
+		out->variant.lbr.from_ip = bts_get(raw, bts_from);
+		out->variant.lbr.to_ip   = bts_get(raw, bts_to);
+	}
+}
+
+static int ptrace_bts_read_record(struct task_struct *child, size_t index,
 				  struct bts_struct __user *out)
 {
 	struct bts_struct ret;
-	int retval;
-	int bts_end;
-	int bts_index;
+	const void *bts_record;
+	size_t bts_index, bts_end;
+	int error;
 
-	if (!child->thread.ds_area_msr)
-		return -ENXIO;
+	error = ds_get_bts_end(child, &bts_end);
+	if (error < 0)
+		return error;
 
-	if (index < 0)
-		return -EINVAL;
-
-	bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr);
 	if (bts_end <= index)
 		return -EINVAL;
 
-	/* translate the ptrace bts index into the ds bts index */
-	bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr);
-	bts_index -= (index + 1);
-	if (bts_index < 0)
-		bts_index += bts_end;
+	error = ds_get_bts_index(child, &bts_index);
+	if (error < 0)
+		return error;
 
-	retval = ds_read_bts((void *)child->thread.ds_area_msr,
-			     bts_index, &ret);
-	if (retval < 0)
-		return retval;
+	/* translate the ptrace bts index into the ds bts index */
+	bts_index += bts_end - (index + 1);
+	if (bts_end <= bts_index)
+		bts_index -= bts_end;
+
+	error = ds_access_bts(child, bts_index, &bts_record);
+	if (error < 0)
+		return error;
+
+	ptrace_bts_translate_record(&ret, bts_record);
 
 	if (copy_to_user(out, &ret, sizeof(ret)))
 		return -EFAULT;
@@ -600,101 +671,106 @@
 	return sizeof(ret);
 }
 
-static int ptrace_bts_clear(struct task_struct *child)
-{
-	if (!child->thread.ds_area_msr)
-		return -ENXIO;
-
-	return ds_clear((void *)child->thread.ds_area_msr);
-}
-
 static int ptrace_bts_drain(struct task_struct *child,
 			    long size,
 			    struct bts_struct __user *out)
 {
-	int end, i;
-	void *ds = (void *)child->thread.ds_area_msr;
+	struct bts_struct ret;
+	const unsigned char *raw;
+	size_t end, i;
+	int error;
 
-	if (!ds)
-		return -ENXIO;
-
-	end = ds_get_bts_index(ds);
-	if (end <= 0)
-		return end;
+	error = ds_get_bts_index(child, &end);
+	if (error < 0)
+		return error;
 
 	if (size < (end * sizeof(struct bts_struct)))
 		return -EIO;
 
-	for (i = 0; i < end; i++, out++) {
-		struct bts_struct ret;
-		int retval;
+	error = ds_access_bts(child, 0, (const void **)&raw);
+	if (error < 0)
+		return error;
 
-		retval = ds_read_bts(ds, i, &ret);
-		if (retval < 0)
-			return retval;
+	for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) {
+		ptrace_bts_translate_record(&ret, raw);
 
 		if (copy_to_user(out, &ret, sizeof(ret)))
 			return -EFAULT;
 	}
 
-	ds_clear(ds);
+	error = ds_clear_bts(child);
+	if (error < 0)
+		return error;
 
 	return end;
 }
 
+static void ptrace_bts_ovfl(struct task_struct *child)
+{
+	send_sig(child->thread.bts_ovfl_signal, child, 0);
+}
+
 static int ptrace_bts_config(struct task_struct *child,
 			     long cfg_size,
 			     const struct ptrace_bts_config __user *ucfg)
 {
 	struct ptrace_bts_config cfg;
-	int bts_size, ret = 0;
-	void *ds;
+	int error = 0;
 
+	error = -EOPNOTSUPP;
+	if (!bts_cfg.sizeof_bts)
+		goto errout;
+
+	error = -EIO;
 	if (cfg_size < sizeof(cfg))
-		return -EIO;
+		goto errout;
 
+	error = -EFAULT;
 	if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
-		return -EFAULT;
+		goto errout;
 
-	if ((int)cfg.size < 0)
-		return -EINVAL;
+	error = -EINVAL;
+	if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
+	    !(cfg.flags & PTRACE_BTS_O_ALLOC))
+		goto errout;
 
-	bts_size = 0;
-	ds = (void *)child->thread.ds_area_msr;
-	if (ds) {
-		bts_size = ds_get_bts_size(ds);
-		if (bts_size < 0)
-			return bts_size;
-	}
-	cfg.size = PAGE_ALIGN(cfg.size);
+	if (cfg.flags & PTRACE_BTS_O_ALLOC) {
+		ds_ovfl_callback_t ovfl = NULL;
+		unsigned int sig = 0;
 
-	if (bts_size != cfg.size) {
-		ret = ptrace_bts_realloc(child, cfg.size,
-					 cfg.flags & PTRACE_BTS_O_CUT_SIZE);
-		if (ret < 0)
+		/* we ignore the error in case we were not tracing child */
+		(void)ds_release_bts(child);
+
+		if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
+			if (!cfg.signal)
+				goto errout;
+
+			sig  = cfg.signal;
+			ovfl = ptrace_bts_ovfl;
+		}
+
+		error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl);
+		if (error < 0)
 			goto errout;
 
-		ds = (void *)child->thread.ds_area_msr;
+		child->thread.bts_ovfl_signal = sig;
 	}
 
-	if (cfg.flags & PTRACE_BTS_O_SIGNAL)
-		ret = ds_set_overflow(ds, DS_O_SIGNAL);
-	else
-		ret = ds_set_overflow(ds, DS_O_WRAP);
-	if (ret < 0)
+	error = -EINVAL;
+	if (!child->thread.ds_ctx && cfg.flags)
 		goto errout;
 
 	if (cfg.flags & PTRACE_BTS_O_TRACE)
-		child->thread.debugctlmsr |= ds_debugctl_mask();
+		child->thread.debugctlmsr |= bts_cfg.debugctl_mask;
 	else
-		child->thread.debugctlmsr &= ~ds_debugctl_mask();
+		child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
 
 	if (cfg.flags & PTRACE_BTS_O_SCHED)
 		set_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
 	else
 		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
 
-	ret = sizeof(cfg);
+	error = sizeof(cfg);
 
 out:
 	if (child->thread.debugctlmsr)
@@ -702,10 +778,10 @@
 	else
 		clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
 
-	return ret;
+	return error;
 
 errout:
-	child->thread.debugctlmsr &= ~ds_debugctl_mask();
+	child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
 	clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
 	goto out;
 }
@@ -714,119 +790,79 @@
 			     long cfg_size,
 			     struct ptrace_bts_config __user *ucfg)
 {
-	void *ds = (void *)child->thread.ds_area_msr;
 	struct ptrace_bts_config cfg;
+	size_t end;
+	const void *base, *max;
+	int error;
 
 	if (cfg_size < sizeof(cfg))
 		return -EIO;
 
+	error = ds_get_bts_end(child, &end);
+	if (error < 0)
+		return error;
+
+	error = ds_access_bts(child, /* index = */ 0, &base);
+	if (error < 0)
+		return error;
+
+	error = ds_access_bts(child, /* index = */ end, &max);
+	if (error < 0)
+		return error;
+
 	memset(&cfg, 0, sizeof(cfg));
-
-	if (ds) {
-		cfg.size = ds_get_bts_size(ds);
-
-		if (ds_get_overflow(ds) == DS_O_SIGNAL)
-			cfg.flags |= PTRACE_BTS_O_SIGNAL;
-
-		if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
-		    child->thread.debugctlmsr & ds_debugctl_mask())
-			cfg.flags |= PTRACE_BTS_O_TRACE;
-
-		if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
-			cfg.flags |= PTRACE_BTS_O_SCHED;
-	}
-
+	cfg.size = (max - base);
+	cfg.signal = child->thread.bts_ovfl_signal;
 	cfg.bts_size = sizeof(struct bts_struct);
 
+	if (cfg.signal)
+		cfg.flags |= PTRACE_BTS_O_SIGNAL;
+
+	if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) &&
+	    child->thread.debugctlmsr & bts_cfg.debugctl_mask)
+		cfg.flags |= PTRACE_BTS_O_TRACE;
+
+	if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS))
+		cfg.flags |= PTRACE_BTS_O_SCHED;
+
 	if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
 		return -EFAULT;
 
 	return sizeof(cfg);
 }
 
-
 static int ptrace_bts_write_record(struct task_struct *child,
 				   const struct bts_struct *in)
 {
-	int retval;
+	unsigned char bts_record[BTS_MAX_RECORD_SIZE];
 
-	if (!child->thread.ds_area_msr)
-		return -ENXIO;
+	BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts);
 
-	retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
-	if (retval)
-		return retval;
+	memset(bts_record, 0, bts_cfg.sizeof_bts);
+	switch (in->qualifier) {
+	case BTS_INVALID:
+		break;
 
-	return sizeof(*in);
-}
+	case BTS_BRANCH:
+		bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
+		bts_set(bts_record, bts_to,   in->variant.lbr.to_ip);
+		break;
 
-static int ptrace_bts_realloc(struct task_struct *child,
-			      int size, int reduce_size)
-{
-	unsigned long rlim, vm;
-	int ret, old_size;
+	case BTS_TASK_ARRIVES:
+	case BTS_TASK_DEPARTS:
+		bts_set(bts_record, bts_from,    bts_escape);
+		bts_set(bts_record, bts_qual,    in->qualifier);
+		bts_set(bts_record, bts_jiffies, in->variant.jiffies);
+		break;
 
-	if (size < 0)
+	default:
 		return -EINVAL;
-
-	old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
-	if (old_size < 0)
-		return old_size;
-
-	ret = ds_free((void **)&child->thread.ds_area_msr);
-	if (ret < 0)
-		goto out;
-
-	size >>= PAGE_SHIFT;
-	old_size >>= PAGE_SHIFT;
-
-	current->mm->total_vm  -= old_size;
-	current->mm->locked_vm -= old_size;
-
-	if (size == 0)
-		goto out;
-
-	rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
-	vm = current->mm->total_vm  + size;
-	if (rlim < vm) {
-		ret = -ENOMEM;
-
-		if (!reduce_size)
-			goto out;
-
-		size = rlim - current->mm->total_vm;
-		if (size <= 0)
-			goto out;
 	}
 
-	rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
-	vm = current->mm->locked_vm  + size;
-	if (rlim < vm) {
-		ret = -ENOMEM;
-
-		if (!reduce_size)
-			goto out;
-
-		size = rlim - current->mm->locked_vm;
-		if (size <= 0)
-			goto out;
-	}
-
-	ret = ds_allocate((void **)&child->thread.ds_area_msr,
-			  size << PAGE_SHIFT);
-	if (ret < 0)
-		goto out;
-
-	current->mm->total_vm  += size;
-	current->mm->locked_vm += size;
-
-out:
-	if (child->thread.ds_area_msr)
-		set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-	else
-		clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-
-	return ret;
+	/* The writing task will be the switched-to task on a context
+	 * switch. It needs to write into the switched-from task's BTS
+	 * buffer. */
+	return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
 }
 
 void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -839,7 +875,66 @@
 
 	ptrace_bts_write_record(tsk, &rec);
 }
-#endif /* X86_BTS */
+
+static const struct bts_configuration bts_cfg_netburst = {
+	.sizeof_bts    = sizeof(long) * 3,
+	.sizeof_field  = sizeof(long),
+	.debugctl_mask = (1<<2)|(1<<3)|(1<<5)
+};
+
+static const struct bts_configuration bts_cfg_pentium_m = {
+	.sizeof_bts    = sizeof(long) * 3,
+	.sizeof_field  = sizeof(long),
+	.debugctl_mask = (1<<6)|(1<<7)
+};
+
+static const struct bts_configuration bts_cfg_core2 = {
+	.sizeof_bts    = 8 * 3,
+	.sizeof_field  = 8,
+	.debugctl_mask = (1<<6)|(1<<7)|(1<<9)
+};
+
+static inline void bts_configure(const struct bts_configuration *cfg)
+{
+	bts_cfg = *cfg;
+}
+
+void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
+{
+	switch (c->x86) {
+	case 0x6:
+		switch (c->x86_model) {
+		case 0xD:
+		case 0xE: /* Pentium M */
+			bts_configure(&bts_cfg_pentium_m);
+			break;
+		case 0xF: /* Core2 */
+        case 0x1C: /* Atom */
+			bts_configure(&bts_cfg_core2);
+			break;
+		default:
+			/* sorry, don't know about them */
+			break;
+		}
+		break;
+	case 0xF:
+		switch (c->x86_model) {
+		case 0x0:
+		case 0x1:
+		case 0x2: /* Netburst */
+			bts_configure(&bts_cfg_netburst);
+			break;
+		default:
+			/* sorry, don't know about them */
+			break;
+		}
+		break;
+	default:
+		/* sorry, don't know about them */
+		break;
+	}
+}
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 /*
  * Called by kernel/ptrace.c when detaching..
@@ -852,15 +947,15 @@
 #ifdef TIF_SYSCALL_EMU
 	clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
-	if (child->thread.ds_area_msr) {
-#ifdef X86_BTS
-		ptrace_bts_realloc(child, 0, 0);
-#endif
-		child->thread.debugctlmsr &= ~ds_debugctl_mask();
-		if (!child->thread.debugctlmsr)
-			clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-		clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
-	}
+#ifdef CONFIG_X86_PTRACE_BTS
+	(void)ds_release_bts(child);
+
+	child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
+	if (!child->thread.debugctlmsr)
+		clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+
+	clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+#endif /* CONFIG_X86_PTRACE_BTS */
 }
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -980,7 +1075,7 @@
 	/*
 	 * These bits need more cooking - not enabled yet:
 	 */
-#ifdef X86_BTS
+#ifdef CONFIG_X86_PTRACE_BTS
 	case PTRACE_BTS_CONFIG:
 		ret = ptrace_bts_config
 			(child, data, (struct ptrace_bts_config __user *)addr);
@@ -992,7 +1087,7 @@
 		break;
 
 	case PTRACE_BTS_SIZE:
-		ret = ptrace_bts_get_size(child);
+		ret = ds_get_bts_index(child, /* pos = */ NULL);
 		break;
 
 	case PTRACE_BTS_GET:
@@ -1001,14 +1096,14 @@
 		break;
 
 	case PTRACE_BTS_CLEAR:
-		ret = ptrace_bts_clear(child);
+		ret = ds_clear_bts(child);
 		break;
 
 	case PTRACE_BTS_DRAIN:
 		ret = ptrace_bts_drain
 			(child, data, (struct bts_struct __user *) addr);
 		break;
-#endif
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 	default:
 		ret = ptrace_request(child, request, addr, data);
@@ -1375,30 +1470,6 @@
 	force_sig_info(SIGTRAP, &info, tsk);
 }
 
-static void syscall_trace(struct pt_regs *regs)
-{
-	if (!(current->ptrace & PT_PTRACED))
-		return;
-
-#if 0
-	printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
-	       current->comm,
-	       regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
-	       current_thread_info()->flags, current->ptrace);
-#endif
-
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-				? 0x80 : 0));
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
-}
 
 #ifdef CONFIG_X86_32
 # define IS_IA32	1
@@ -1432,8 +1503,9 @@
 	if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
 		ret = -1L;
 
-	if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
-		syscall_trace(regs);
+	if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
+	    tracehook_report_syscall_entry(regs))
+		ret = -1L;
 
 	if (unlikely(current->audit_context)) {
 		if (IS_IA32)
@@ -1459,7 +1531,7 @@
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
-		syscall_trace(regs);
+		tracehook_report_syscall_exit(regs, 0);
 
 	/*
 	 * If TIF_SYSCALL_EMU is set, we only get here because of
@@ -1475,6 +1547,6 @@
 	 * system call instruction.
 	 */
 	if (test_thread_flag(TIF_SINGLESTEP) &&
-	    (current->ptrace & PT_PTRACED))
+	    tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
 		send_sigtrap(current, regs, 0);
 }

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 724adfc..f4c93f1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c

@@ -29,7 +29,11 @@
 
 static const struct desc_ptr no_idt = {};
 static int reboot_mode;
-enum reboot_type reboot_type = BOOT_KBD;
+/*
+ * Keyboard reset and triple fault may result in INIT, not RESET, which
+ * doesn't work when we're in vmx root mode.  Try ACPI first.
+ */
+enum reboot_type reboot_type = BOOT_ACPI;
 int reboot_force;
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_SMP)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9838f25..141efab 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c

@@ -223,6 +223,9 @@
 #define RAMDISK_LOAD_FLAG		0x4000
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
+#ifdef CONFIG_CMDLINE_BOOL
+static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
+#endif
 
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 struct edd edd;
@@ -665,6 +668,19 @@
 	bss_resource.start = virt_to_phys(&__bss_start);
 	bss_resource.end = virt_to_phys(&__bss_stop)-1;
 
+#ifdef CONFIG_CMDLINE_BOOL
+#ifdef CONFIG_CMDLINE_OVERRIDE
+	strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+#else
+	if (builtin_cmdline[0]) {
+		/* append boot loader cmdline to builtin */
+		strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
+		strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+		strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+	}
+#endif
+#endif
+
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 76e305e..0e67f72 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c

@@ -162,9 +162,16 @@
 			printk(KERN_INFO
 			       "cpu %d has no node %d or node-local memory\n",
 				cpu, node);
+			if (ptr)
+				printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n",
+					 cpu, __pa(ptr));
 		}
-		else
+		else {
 			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
+			if (ptr)
+				printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
+					 cpu, node, __pa(ptr));
+		}
 #endif
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);

diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
index 72bbb51..8b4956e 100644
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h

@@ -24,4 +24,9 @@
 	struct ucontext uc;
 	struct siginfo info;
 };
+
+int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+		sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct k_sigaction *ka,
+		sigset_t *set, struct pt_regs *regs);
 #endif

diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 6fb5bcd..2a2435d 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c

@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
+#include <linux/tracehook.h>
 #include <linux/elf.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
@@ -26,6 +27,7 @@
 #include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/vdso.h>
+#include <asm/syscalls.h>
 
 #include "sigframe.h"
 
@@ -558,8 +560,6 @@
 	 * handler too.
 	 */
 	regs->flags &= ~X86_EFLAGS_TF;
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
 
 	spin_lock_irq(&current->sighand->siglock);
 	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
@@ -568,6 +568,9 @@
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
+	tracehook_signal_handler(sig, info, ka, regs,
+				 test_thread_flag(TIF_SINGLESTEP));
+
 	return 0;
 }
 
@@ -661,5 +664,10 @@
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs);
 
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
+
 	clear_thread_flag(TIF_IRET);
 }

diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ca316b5..694aa88 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c

@@ -15,17 +15,21 @@
 #include <linux/errno.h>
 #include <linux/wait.h>
 #include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/unistd.h>
 #include <linux/stddef.h>
 #include <linux/personality.h>
 #include <linux/compiler.h>
+#include <linux/uaccess.h>
+
 #include <asm/processor.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/proto.h>
 #include <asm/ia32_unistd.h>
 #include <asm/mce.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
 #include "sigframe.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -41,11 +45,6 @@
 # define FIX_EFLAGS	__FIX_EFLAGS
 #endif
 
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-               sigset_t *set, struct pt_regs * regs); 
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
-            sigset_t *set, struct pt_regs * regs); 
-
 asmlinkage long
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		struct pt_regs *regs)
@@ -128,7 +127,7 @@
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
-#define COPY(x)		err |= __get_user(regs->x, &sc->x)
+#define COPY(x)		(err |= __get_user(regs->x, &sc->x))
 
 	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
 	COPY(dx); COPY(cx); COPY(ip);
@@ -158,7 +157,7 @@
 	}
 
 	{
-		struct _fpstate __user * buf;
+		struct _fpstate __user *buf;
 		err |= __get_user(buf, &sc->fpstate);
 
 		if (buf) {
@@ -198,7 +197,7 @@
 	current->blocked = set;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
-	
+
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
@@ -208,16 +207,17 @@
 	return ax;
 
 badframe:
-	signal_fault(regs,frame,"sigreturn");
+	signal_fault(regs, frame, "sigreturn");
 	return 0;
-}	
+}
 
 /*
  * Set up a signal frame.
  */
 
 static inline int
-setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me)
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+		unsigned long mask, struct task_struct *me)
 {
 	int err = 0;
 
@@ -273,35 +273,35 @@
 }
 
 static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			   sigset_t *set, struct pt_regs * regs)
+			   sigset_t *set, struct pt_regs *regs)
 {
 	struct rt_sigframe __user *frame;
-	struct _fpstate __user *fp = NULL; 
+	struct _fpstate __user *fp = NULL;
 	int err = 0;
 	struct task_struct *me = current;
 
 	if (used_math()) {
-		fp = get_stack(ka, regs, sizeof(struct _fpstate)); 
+		fp = get_stack(ka, regs, sizeof(struct _fpstate));
 		frame = (void __user *)round_down(
 			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
 
 		if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
 			goto give_sigsegv;
 
-		if (save_i387(fp) < 0) 
-			err |= -1; 
+		if (save_i387(fp) < 0)
+			err |= -1;
 	} else
 		frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto give_sigsegv;
 
-	if (ka->sa.sa_flags & SA_SIGINFO) { 
+	if (ka->sa.sa_flags & SA_SIGINFO) {
 		err |= copy_siginfo_to_user(&frame->info, info);
 		if (err)
 			goto give_sigsegv;
 	}
-		
+
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
@@ -311,9 +311,9 @@
 	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
 	err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
-	if (sizeof(*set) == 16) { 
+	if (sizeof(*set) == 16) {
 		__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
-		__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); 
+		__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
 	} else
 		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 
@@ -324,7 +324,7 @@
 		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
 	} else {
 		/* could use a vstub here */
-		goto give_sigsegv; 
+		goto give_sigsegv;
 	}
 
 	if (err)
@@ -332,7 +332,7 @@
 
 	/* Set up registers for signal handler */
 	regs->di = sig;
-	/* In case the signal handler was declared without prototypes */ 
+	/* In case the signal handler was declared without prototypes */
 	regs->ax = 0;
 
 	/* This also works for non SA_SIGINFO handlers because they expect the
@@ -355,37 +355,8 @@
 }
 
 /*
- * Return -1L or the syscall number that @regs is executing.
- */
-static long current_syscall(struct pt_regs *regs)
-{
-	/*
-	 * We always sign-extend a -1 value being set here,
-	 * so this is always either -1L or a syscall number.
-	 */
-	return regs->orig_ax;
-}
-
-/*
- * Return a value that is -EFOO if the system call in @regs->orig_ax
- * returned an error.  This only works for @regs from @current.
- */
-static long current_syscall_ret(struct pt_regs *regs)
-{
-#ifdef CONFIG_IA32_EMULATION
-	if (test_thread_flag(TIF_IA32))
-		/*
-		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
-		 * and will match correctly in comparisons.
-		 */
-		return (int) regs->ax;
-#endif
-	return regs->ax;
-}
-
-/*
  * OK, we're invoking a handler
- */	
+ */
 
 static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
@@ -394,9 +365,9 @@
 	int ret;
 
 	/* Are we from a system call? */
-	if (current_syscall(regs) >= 0) {
+	if (syscall_get_nr(current, regs) >= 0) {
 		/* If so, check system call restarting.. */
-		switch (current_syscall_ret(regs)) {
+		switch (syscall_get_error(current, regs)) {
 		case -ERESTART_RESTARTBLOCK:
 		case -ERESTARTNOHAND:
 			regs->ax = -EINTR;
@@ -429,7 +400,7 @@
 			ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs);
 		else
 			ret = ia32_setup_frame(sig, ka, oldset, regs);
-	} else 
+	} else
 #endif
 	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
@@ -453,15 +424,16 @@
 		 * handler too.
 		 */
 		regs->flags &= ~X86_EFLAGS_TF;
-		if (test_thread_flag(TIF_SINGLESTEP))
-			ptrace_notify(SIGTRAP);
 
 		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+		sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
 		if (!(ka->sa.sa_flags & SA_NODEFER))
-			sigaddset(&current->blocked,sig);
+			sigaddset(&current->blocked, sig);
 		recalc_sigpending();
 		spin_unlock_irq(&current->sighand->siglock);
+
+		tracehook_signal_handler(sig, info, ka, regs,
+					 test_thread_flag(TIF_SINGLESTEP));
 	}
 
 	return ret;
@@ -518,9 +490,9 @@
 	}
 
 	/* Did we come from a system call? */
-	if (current_syscall(regs) >= 0) {
+	if (syscall_get_nr(current, regs) >= 0) {
 		/* Restart the system call - no handlers present */
-		switch (current_syscall_ret(regs)) {
+		switch (syscall_get_error(current, regs)) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
@@ -558,17 +530,23 @@
 	/* deal with pending signal delivery */
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-{ 
-	struct task_struct *me = current; 
+{
+	struct task_struct *me = current;
 	if (show_unhandled_signals && printk_ratelimit()) {
 		printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
-	       me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax);
+	       me->comm, me->pid, where, frame, regs->ip,
+		   regs->sp, regs->orig_ax);
 		print_vma_addr(" in ", regs->ip);
 		printk("\n");
 	}
 
-	force_sig(SIGSEGV, me); 
-} 
+	force_sig(SIGSEGV, me);
+}

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7985c5b..45531e3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c

@@ -88,7 +88,7 @@
 #define get_idle_for_cpu(x)      (per_cpu(idle_thread_array, x))
 #define set_idle_for_cpu(x, p)   (per_cpu(idle_thread_array, x) = (p))
 #else
-struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
 #define get_idle_for_cpu(x)      (idle_thread_array[(x)])
 #define set_idle_for_cpu(x, p)   (idle_thread_array[(x)] = (p))
 #endif
@@ -129,7 +129,7 @@
 static cpumask_t cpu_sibling_setup_map;
 
 /* Set if we find a B stepping CPU */
-int __cpuinitdata smp_b_stepping;
+static int __cpuinitdata smp_b_stepping;
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
 
@@ -1313,16 +1313,13 @@
 	if (!num_processors)
 		num_processors = 1;
 
-#ifdef CONFIG_HOTPLUG_CPU
 	if (additional_cpus == -1) {
 		if (disabled_cpus > 0)
 			additional_cpus = disabled_cpus;
 		else
 			additional_cpus = 0;
 	}
-#else
-	additional_cpus = 0;
-#endif
+
 	possible = num_processors + additional_cpus;
 	if (possible > NR_CPUS)
 		possible = NR_CPUS;

diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 7066cb8..1884a8d 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c

@@ -22,6 +22,8 @@
 #include <linux/uaccess.h>
 #include <linux/unistd.h>
 
+#include <asm/syscalls.h>
+
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			  unsigned long prot, unsigned long flags,
 			  unsigned long fd, unsigned long pgoff)

diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 3b360ef..6bc211a 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c

@@ -13,15 +13,17 @@
 #include <linux/utsname.h>
 #include <linux/personality.h>
 #include <linux/random.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/ia32.h>
+#include <asm/syscalls.h>
 
-asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long off)
+asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
+		unsigned long prot, unsigned long flags,
+		unsigned long fd, unsigned long off)
 {
 	long error;
-	struct file * file;
+	struct file *file;
 
 	error = -EINVAL;
 	if (off & ~PAGE_MASK)
@@ -56,9 +58,9 @@
 		   unmapped base down for this case. This can give
 		   conflicts with the heap, but we assume that glibc
 		   malloc knows how to fall back to mmap. Give it 1GB
-		   of playground for now. -AK */ 
-		*begin = 0x40000000; 
-		*end = 0x80000000;		
+		   of playground for now. -AK */
+		*begin = 0x40000000;
+		*end = 0x80000000;
 		if (current->flags & PF_RANDOMIZE) {
 			new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
 			if (new_begin)
@@ -66,9 +68,9 @@
 		}
 	} else {
 		*begin = TASK_UNMAPPED_BASE;
-		*end = TASK_SIZE; 
+		*end = TASK_SIZE;
 	}
-} 
+}
 
 unsigned long
 arch_get_unmapped_area(struct file *filp, unsigned long addr,
@@ -78,11 +80,11 @@
 	struct vm_area_struct *vma;
 	unsigned long start_addr;
 	unsigned long begin, end;
-	
+
 	if (flags & MAP_FIXED)
 		return addr;
 
-	find_start_end(flags, &begin, &end); 
+	find_start_end(flags, &begin, &end);
 
 	if (len > end)
 		return -ENOMEM;
@@ -96,12 +98,12 @@
 	}
 	if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32))
 	    && len <= mm->cached_hole_size) {
-	        mm->cached_hole_size = 0;
+		mm->cached_hole_size = 0;
 		mm->free_area_cache = begin;
 	}
 	addr = mm->free_area_cache;
-	if (addr < begin) 
-		addr = begin; 
+	if (addr < begin)
+		addr = begin;
 	start_addr = addr;
 
 full_search:
@@ -127,7 +129,7 @@
 			return addr;
 		}
 		if (addr + mm->cached_hole_size < vma->vm_start)
-		        mm->cached_hole_size = vma->vm_start - addr;
+			mm->cached_hole_size = vma->vm_start - addr;
 
 		addr = vma->vm_end;
 	}
@@ -177,7 +179,7 @@
 		vma = find_vma(mm, addr-len);
 		if (!vma || addr <= vma->vm_start)
 			/* remember the address as a hint for next time */
-			return (mm->free_area_cache = addr-len);
+			return mm->free_area_cache = addr-len;
 	}
 
 	if (mm->mmap_base < len)
@@ -194,7 +196,7 @@
 		vma = find_vma(mm, addr);
 		if (!vma || addr+len <= vma->vm_start)
 			/* remember the address as a hint for next time */
-			return (mm->free_area_cache = addr);
+			return mm->free_area_cache = addr;
 
 		/* remember the largest hole we saw so far */
 		if (addr + mm->cached_hole_size < vma->vm_start)
@@ -224,13 +226,13 @@
 }
 
 
-asmlinkage long sys_uname(struct new_utsname __user * name)
+asmlinkage long sys_uname(struct new_utsname __user *name)
 {
 	int err;
 	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof(*name));
 	up_read(&uts_sem);
-	if (personality(current->personality) == PER_LINUX32) 
-		err |= copy_to_user(&name->machine, "i686", 5); 		
+	if (personality(current->personality) == PER_LINUX32)
+		err |= copy_to_user(&name->machine, "i686", 5);
 	return err ? -EFAULT : 0;
 }

diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 170d43c..3d1be4f 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c

@@ -8,12 +8,12 @@
 #define __NO_STUBS
 
 #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 #include <asm/unistd_64.h>
 
 #undef __SYSCALL
 #define __SYSCALL(nr, sym) [nr] = sym,
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 
 typedef void (*sys_call_ptr_t)(void);
 

diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index ffe3c66..bbecf8b 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c

@@ -36,6 +36,7 @@
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
 #include <asm/time.h>
+#include <asm/timer.h>
 
 #include "do_timer.h"
 

diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index ab6bf37..6bb7b85 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c

@@ -10,6 +10,7 @@
 #include <asm/ldt.h>
 #include <asm/processor.h>
 #include <asm/proto.h>
+#include <asm/syscalls.h>
 
 #include "tls.h"
 

diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 513caac..7a31f10 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c

@@ -32,6 +32,8 @@
 #include <linux/bug.h>
 #include <linux/nmi.h>
 #include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/io.h>
 
 #if defined(CONFIG_EDAC)
 #include <linux/edac.h>
@@ -45,9 +47,6 @@
 #include <asm/unwind.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
-#include <asm/nmi.h>
-#include <asm/smp.h>
-#include <asm/io.h>
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
 #include <asm/pda.h>
@@ -85,7 +84,8 @@
 
 void printk_address(unsigned long address, int reliable)
 {
-	printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
+	printk(" [<%016lx>] %s%pS\n",
+			address, reliable ?	"" : "? ", (void *) address);
 }
 
 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -98,7 +98,8 @@
 		[STACKFAULT_STACK - 1] = "#SS",
 		[MCE_STACK - 1] = "#MC",
 #if DEBUG_STKSZ > EXCEPTION_STKSZ
-		[N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
+		[N_EXCEPTION_STACKS ...
+			N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
 #endif
 	};
 	unsigned k;
@@ -163,7 +164,7 @@
 }
 
 /*
- * x86-64 can have up to three kernel stacks: 
+ * x86-64 can have up to three kernel stacks:
  * process stack
  * interrupt stack
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
@@ -219,7 +220,7 @@
 		const struct stacktrace_ops *ops, void *data)
 {
 	const unsigned cpu = get_cpu();
-	unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
+	unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
 	unsigned used = 0;
 	struct thread_info *tinfo;
 
@@ -237,7 +238,7 @@
 	if (!bp) {
 		if (task == current) {
 			/* Grab bp right from our regs */
-			asm("movq %%rbp, %0" : "=r" (bp) :);
+			asm("movq %%rbp, %0" : "=r" (bp) : );
 		} else {
 			/* bp is the last reg pushed by switch_to */
 			bp = *(unsigned long *) task->thread.sp;
@@ -339,9 +340,8 @@
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		unsigned long *stack, unsigned long bp, char *log_lvl)
 {
-	printk("\nCall Trace:\n");
+	printk("Call Trace:\n");
 	dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-	printk("\n");
 }
 
 void show_trace(struct task_struct *task, struct pt_regs *regs,
@@ -357,11 +357,15 @@
 	unsigned long *stack;
 	int i;
 	const int cpu = smp_processor_id();
-	unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
-	unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+	unsigned long *irqstack_end =
+		(unsigned long *) (cpu_pda(cpu)->irqstackptr);
+	unsigned long *irqstack =
+		(unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
 
-	// debugging aid: "show_stack(NULL, NULL);" prints the
-	// back trace for this cpu.
+	/*
+	 * debugging aid: "show_stack(NULL, NULL);" prints the
+	 * back trace for this cpu.
+	 */
 
 	if (sp == NULL) {
 		if (task)
@@ -386,6 +390,7 @@
 		printk(" %016lx", *stack++);
 		touch_nmi_watchdog();
 	}
+	printk("\n");
 	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
 
@@ -404,7 +409,7 @@
 
 #ifdef CONFIG_FRAME_POINTER
 	if (!bp)
-		asm("movq %%rbp, %0" : "=r" (bp):);
+		asm("movq %%rbp, %0" : "=r" (bp) : );
 #endif
 
 	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
@@ -414,7 +419,6 @@
 		init_utsname()->version);
 	show_trace(NULL, NULL, &stack, bp);
 }
-
 EXPORT_SYMBOL(dump_stack);
 
 void show_registers(struct pt_regs *regs)
@@ -443,7 +447,6 @@
 		printk("Stack: ");
 		show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
 				regs->bp, "");
-		printk("\n");
 
 		printk(KERN_EMERG "Code: ");
 
@@ -493,7 +496,7 @@
 	raw_local_irq_save(flags);
 	cpu = smp_processor_id();
 	if (!__raw_spin_trylock(&die_lock)) {
-		if (cpu == die_owner) 
+		if (cpu == die_owner)
 			/* nested oops. should stop eventually */;
 		else
 			__raw_spin_lock(&die_lock);
@@ -638,7 +641,7 @@
 }
 
 #define DO_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code)	\
+asmlinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
 							== NOTIFY_STOP)	\
@@ -648,7 +651,7 @@
 }
 
 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)		\
-asmlinkage void do_##name(struct pt_regs * regs, long error_code)	\
+asmlinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
 	siginfo_t info;							\
 	info.si_signo = signr;						\
@@ -683,7 +686,7 @@
 	preempt_conditional_cli(regs);
 }
 
-asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
+asmlinkage void do_double_fault(struct pt_regs *regs, long error_code)
 {
 	static const char str[] = "double fault";
 	struct task_struct *tsk = current;
@@ -778,9 +781,10 @@
 }
 
 static notrace __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
-	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
+			NOTIFY_STOP)
 		return;
 	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
 		reason);
@@ -882,7 +886,7 @@
 	else if (user_mode(eregs))
 		regs = task_pt_regs(current);
 	/* Exception from kernel and interrupts are enabled. Move to
- 	   kernel process stack. */
+	   kernel process stack. */
 	else if (eregs->flags & X86_EFLAGS_IF)
 		regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
 	if (eregs != regs)
@@ -891,7 +895,7 @@
 }
 
 /* runs on IST stack. */
-asmlinkage void __kprobes do_debug(struct pt_regs * regs,
+asmlinkage void __kprobes do_debug(struct pt_regs *regs,
 				   unsigned long error_code)
 {
 	struct task_struct *tsk = current;
@@ -1035,7 +1039,7 @@
 
 asmlinkage void bad_intr(void)
 {
-	printk("bad interrupt"); 
+	printk("bad interrupt");
 }
 
 asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
@@ -1047,7 +1051,7 @@
 
 	conditional_sti(regs);
 	if (!user_mode(regs) &&
-        	kernel_math_error(regs, "kernel simd math error", 19))
+			kernel_math_error(regs, "kernel simd math error", 19))
 		return;
 
 	/*
@@ -1092,7 +1096,7 @@
 	force_sig_info(SIGFPE, &info, task);
 }
 
-asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs)
 {
 }
 
@@ -1149,8 +1153,10 @@
 	set_intr_gate(0, &divide_error);
 	set_intr_gate_ist(1, &debug, DEBUG_STACK);
 	set_intr_gate_ist(2, &nmi, NMI_STACK);
- 	set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */
-	set_system_gate(4, &overflow); /* int4 can be called from all */
+	/* int3 can be called from all */
+	set_system_gate_ist(3, &int3, DEBUG_STACK);
+	/* int4 can be called from all */
+	set_system_gate(4, &overflow);
 	set_intr_gate(5, &bounds);
 	set_intr_gate(6, &invalid_op);
 	set_intr_gate(7, &device_not_available);

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 8f98e9d..161bb85 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c

@@ -104,7 +104,7 @@
 /*
  * Read TSC and the reference counters. Take care of SMI disturbance
  */
-static u64 tsc_read_refs(u64 *pm, u64 *hpet)
+static u64 tsc_read_refs(u64 *p, int hpet)
 {
 	u64 t1, t2;
 	int i;
@@ -112,9 +112,9 @@
 	for (i = 0; i < MAX_RETRIES; i++) {
 		t1 = get_cycles();
 		if (hpet)
-			*hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
+			*p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
 		else
-			*pm = acpi_pm_read_early();
+			*p = acpi_pm_read_early();
 		t2 = get_cycles();
 		if ((t2 - t1) < SMI_TRESHOLD)
 			return t2;
@@ -123,13 +123,59 @@
 }
 
 /*
+ * Calculate the TSC frequency from HPET reference
+ */
+static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
+{
+	u64 tmp;
+
+	if (hpet2 < hpet1)
+		hpet2 += 0x100000000ULL;
+	hpet2 -= hpet1;
+	tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
+	do_div(tmp, 1000000);
+	do_div(deltatsc, tmp);
+
+	return (unsigned long) deltatsc;
+}
+
+/*
+ * Calculate the TSC frequency from PMTimer reference
+ */
+static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
+{
+	u64 tmp;
+
+	if (!pm1 && !pm2)
+		return ULONG_MAX;
+
+	if (pm2 < pm1)
+		pm2 += (u64)ACPI_PM_OVRRUN;
+	pm2 -= pm1;
+	tmp = pm2 * 1000000000LL;
+	do_div(tmp, PMTMR_TICKS_PER_SEC);
+	do_div(deltatsc, tmp);
+
+	return (unsigned long) deltatsc;
+}
+
+#define CAL_MS		10
+#define CAL_LATCH	(CLOCK_TICK_RATE / (1000 / CAL_MS))
+#define CAL_PIT_LOOPS	1000
+
+#define CAL2_MS		50
+#define CAL2_LATCH	(CLOCK_TICK_RATE / (1000 / CAL2_MS))
+#define CAL2_PIT_LOOPS	5000
+
+
+/*
  * Try to calibrate the TSC against the Programmable
  * Interrupt Timer and return the frequency of the TSC
  * in kHz.
  *
  * Return ULONG_MAX on failure to calibrate.
  */
-static unsigned long pit_calibrate_tsc(void)
+static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
 {
 	u64 tsc, t1, t2, delta;
 	unsigned long tscmin, tscmax;
@@ -144,8 +190,8 @@
 	 * (LSB then MSB) to begin countdown.
 	 */
 	outb(0xb0, 0x43);
-	outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
-	outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
+	outb(latch & 0xff, 0x42);
+	outb(latch >> 8, 0x42);
 
 	tsc = t1 = t2 = get_cycles();
 
@@ -166,31 +212,154 @@
 	/*
 	 * Sanity checks:
 	 *
-	 * If we were not able to read the PIT more than 5000
+	 * If we were not able to read the PIT more than loopmin
 	 * times, then we have been hit by a massive SMI
 	 *
 	 * If the maximum is 10 times larger than the minimum,
 	 * then we got hit by an SMI as well.
 	 */
-	if (pitcnt < 5000 || tscmax > 10 * tscmin)
+	if (pitcnt < loopmin || tscmax > 10 * tscmin)
 		return ULONG_MAX;
 
 	/* Calculate the PIT value */
 	delta = t2 - t1;
-	do_div(delta, 50);
+	do_div(delta, ms);
 	return delta;
 }
 
+/*
+ * This reads the current MSB of the PIT counter, and
+ * checks if we are running on sufficiently fast and
+ * non-virtualized hardware.
+ *
+ * Our expectations are:
+ *
+ *  - the PIT is running at roughly 1.19MHz
+ *
+ *  - each IO is going to take about 1us on real hardware,
+ *    but we allow it to be much faster (by a factor of 10) or
+ *    _slightly_ slower (ie we allow up to a 2us read+counter
+ *    update - anything else implies a unacceptably slow CPU
+ *    or PIT for the fast calibration to work.
+ *
+ *  - with 256 PIT ticks to read the value, we have 214us to
+ *    see the same MSB (and overhead like doing a single TSC
+ *    read per MSB value etc).
+ *
+ *  - We're doing 2 reads per loop (LSB, MSB), and we expect
+ *    them each to take about a microsecond on real hardware.
+ *    So we expect a count value of around 100. But we'll be
+ *    generous, and accept anything over 50.
+ *
+ *  - if the PIT is stuck, and we see *many* more reads, we
+ *    return early (and the next caller of pit_expect_msb()
+ *    then consider it a failure when they don't see the
+ *    next expected value).
+ *
+ * These expectations mean that we know that we have seen the
+ * transition from one expected value to another with a fairly
+ * high accuracy, and we didn't miss any events. We can thus
+ * use the TSC value at the transitions to calculate a pretty
+ * good value for the TSC frequencty.
+ */
+static inline int pit_expect_msb(unsigned char val)
+{
+	int count = 0;
+
+	for (count = 0; count < 50000; count++) {
+		/* Ignore LSB */
+		inb(0x42);
+		if (inb(0x42) != val)
+			break;
+	}
+	return count > 50;
+}
+
+/*
+ * How many MSB values do we want to see? We aim for a
+ * 15ms calibration, which assuming a 2us counter read
+ * error should give us roughly 150 ppm precision for
+ * the calibration.
+ */
+#define QUICK_PIT_MS 15
+#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
+
+static unsigned long quick_pit_calibrate(void)
+{
+	/* Set the Gate high, disable speaker */
+	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+	/*
+	 * Counter 2, mode 0 (one-shot), binary count
+	 *
+	 * NOTE! Mode 2 decrements by two (and then the
+	 * output is flipped each time, giving the same
+	 * final output frequency as a decrement-by-one),
+	 * so mode 0 is much better when looking at the
+	 * individual counts.
+	 */
+	outb(0xb0, 0x43);
+
+	/* Start at 0xffff */
+	outb(0xff, 0x42);
+	outb(0xff, 0x42);
+
+	if (pit_expect_msb(0xff)) {
+		int i;
+		u64 t1, t2, delta;
+		unsigned char expect = 0xfe;
+
+		t1 = get_cycles();
+		for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) {
+			if (!pit_expect_msb(expect))
+				goto failed;
+		}
+		t2 = get_cycles();
+
+		/*
+		 * Make sure we can rely on the second TSC timestamp:
+		 */
+		if (!pit_expect_msb(expect))
+			goto failed;
+
+		/*
+		 * Ok, if we get here, then we've seen the
+		 * MSB of the PIT decrement QUICK_PIT_ITERATIONS
+		 * times, and each MSB had many hits, so we never
+		 * had any sudden jumps.
+		 *
+		 * As a result, we can depend on there not being
+		 * any odd delays anywhere, and the TSC reads are
+		 * reliable.
+		 *
+		 * kHz = ticks / time-in-seconds / 1000;
+		 * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000
+		 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000)
+		 */
+		delta = (t2 - t1)*PIT_TICK_RATE;
+		do_div(delta, QUICK_PIT_ITERATIONS*256*1000);
+		printk("Fast TSC calibration using PIT\n");
+		return delta;
+	}
+failed:
+	return 0;
+}
 
 /**
  * native_calibrate_tsc - calibrate the tsc on boot
  */
 unsigned long native_calibrate_tsc(void)
 {
-	u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2;
+	u64 tsc1, tsc2, delta, ref1, ref2;
 	unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
-	unsigned long flags;
-	int hpet = is_hpet_enabled(), i;
+	unsigned long flags, latch, ms, fast_calibrate;
+	int hpet = is_hpet_enabled(), i, loopmin;
+
+	local_irq_save(flags);
+	fast_calibrate = quick_pit_calibrate();
+	local_irq_restore(flags);
+	if (fast_calibrate)
+		return fast_calibrate;
 
 	/*
 	 * Run 5 calibration loops to get the lowest frequency value
@@ -216,7 +385,13 @@
 	 * calibration delay loop as we have to wait for a certain
 	 * amount of time anyway.
 	 */
-	for (i = 0; i < 5; i++) {
+
+	/* Preset PIT loop values */
+	latch = CAL_LATCH;
+	ms = CAL_MS;
+	loopmin = CAL_PIT_LOOPS;
+
+	for (i = 0; i < 3; i++) {
 		unsigned long tsc_pit_khz;
 
 		/*
@@ -226,16 +401,16 @@
 		 * read the end value.
 		 */
 		local_irq_save(flags);
-		tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
-		tsc_pit_khz = pit_calibrate_tsc();
-		tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
+		tsc1 = tsc_read_refs(&ref1, hpet);
+		tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
+		tsc2 = tsc_read_refs(&ref2, hpet);
 		local_irq_restore(flags);
 
 		/* Pick the lowest PIT TSC calibration so far */
 		tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
 
 		/* hpet or pmtimer available ? */
-		if (!hpet && !pm1 && !pm2)
+		if (!hpet && !ref1 && !ref2)
 			continue;
 
 		/* Check, whether the sampling was disturbed by an SMI */
@@ -243,23 +418,41 @@
 			continue;
 
 		tsc2 = (tsc2 - tsc1) * 1000000LL;
+		if (hpet)
+			tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
+		else
+			tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
 
-		if (hpet) {
-			if (hpet2 < hpet1)
-				hpet2 += 0x100000000ULL;
-			hpet2 -= hpet1;
-			tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
-			do_div(tsc1, 1000000);
-		} else {
-			if (pm2 < pm1)
-				pm2 += (u64)ACPI_PM_OVRRUN;
-			pm2 -= pm1;
-			tsc1 = pm2 * 1000000000LL;
-			do_div(tsc1, PMTMR_TICKS_PER_SEC);
+		tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
+
+		/* Check the reference deviation */
+		delta = ((u64) tsc_pit_min) * 100;
+		do_div(delta, tsc_ref_min);
+
+		/*
+		 * If both calibration results are inside a 10% window
+		 * then we can be sure, that the calibration
+		 * succeeded. We break out of the loop right away. We
+		 * use the reference value, as it is more precise.
+		 */
+		if (delta >= 90 && delta <= 110) {
+			printk(KERN_INFO
+			       "TSC: PIT calibration matches %s. %d loops\n",
+			       hpet ? "HPET" : "PMTIMER", i + 1);
+			return tsc_ref_min;
 		}
 
-		do_div(tsc2, tsc1);
-		tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
+		/*
+		 * Check whether PIT failed more than once. This
+		 * happens in virtualized environments. We need to
+		 * give the virtual PC a slightly longer timeframe for
+		 * the HPET/PMTIMER to make the result precise.
+		 */
+		if (i == 1 && tsc_pit_min == ULONG_MAX) {
+			latch = CAL2_LATCH;
+			ms = CAL2_MS;
+			loopmin = CAL2_PIT_LOOPS;
+		}
 	}
 
 	/*
@@ -270,7 +463,7 @@
 		printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n");
 
 		/* We don't have an alternative source, disable TSC */
-		if (!hpet && !pm1 && !pm2) {
+		if (!hpet && !ref1 && !ref2) {
 			printk("TSC: No reference (HPET/PMTIMER) available\n");
 			return 0;
 		}
@@ -278,7 +471,7 @@
 		/* The alternative source failed as well, disable TSC */
 		if (tsc_ref_min == ULONG_MAX) {
 			printk(KERN_WARNING "TSC: HPET/PMTIMER calibration "
-			       "failed due to SMI disturbance.\n");
+			       "failed.\n");
 			return 0;
 		}
 
@@ -290,44 +483,25 @@
 	}
 
 	/* We don't have an alternative source, use the PIT calibration value */
-	if (!hpet && !pm1 && !pm2) {
+	if (!hpet && !ref1 && !ref2) {
 		printk(KERN_INFO "TSC: Using PIT calibration value\n");
 		return tsc_pit_min;
 	}
 
 	/* The alternative source failed, use the PIT calibration value */
 	if (tsc_ref_min == ULONG_MAX) {
-		printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due "
-		       "to SMI disturbance. Using PIT calibration\n");
+		printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. "
+		       "Using PIT calibration\n");
 		return tsc_pit_min;
 	}
 
-	/* Check the reference deviation */
-	delta = ((u64) tsc_pit_min) * 100;
-	do_div(delta, tsc_ref_min);
-
-	/*
-	 * If both calibration results are inside a 5% window, the we
-	 * use the lower frequency of those as it is probably the
-	 * closest estimate.
-	 */
-	if (delta >= 95 && delta <= 105) {
-		printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n",
-		       hpet ? "HPET" : "PMTIMER");
-		printk(KERN_INFO "TSC: using %s calibration value\n",
-		       tsc_pit_min <= tsc_ref_min ? "PIT" :
-		       hpet ? "HPET" : "PMTIMER");
-		return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min;
-	}
-
-	printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
-	       hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
-
 	/*
 	 * The calibration values differ too much. In doubt, we use
 	 * the PIT value as we know that there are PMTIMERs around
-	 * running at double speed.
+	 * running at double speed. At least we let the user know:
 	 */
+	printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
+	       hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
 	printk(KERN_INFO "TSC: Using PIT calibration value\n");
 	return tsc_pit_min;
 }

diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 594ef47..61a97e6 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c

@@ -25,45 +25,31 @@
 #include <asm/visws/cobalt.h>
 #include <asm/visws/piix4.h>
 #include <asm/arch_hooks.h>
+#include <asm/io_apic.h>
 #include <asm/fixmap.h>
 #include <asm/reboot.h>
 #include <asm/setup.h>
 #include <asm/e820.h>
-#include <asm/smp.h>
 #include <asm/io.h>
 
 #include <mach_ipi.h>
 
 #include "mach_apic.h"
 
-#include <linux/init.h>
-#include <linux/smp.h>
-
 #include <linux/kernel_stat.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
 
-#include <asm/io.h>
-#include <asm/apic.h>
 #include <asm/i8259.h>
 #include <asm/irq_vectors.h>
-#include <asm/visws/cobalt.h>
 #include <asm/visws/lithium.h>
-#include <asm/visws/piix4.h>
 
 #include <linux/sched.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
 
 extern int no_broadcast;
 
-#include <asm/io.h>
 #include <asm/apic.h>
-#include <asm/arch_hooks.h>
-#include <asm/visws/cobalt.h>
-#include <asm/visws/lithium.h>
 
 char visws_board_type	= -1;
 char visws_board_rev	= -1;

diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 38f566f..4eeb5cf 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c

@@ -46,6 +46,7 @@
 #include <asm/io.h>
 #include <asm/tlbflush.h>
 #include <asm/irq.h>
+#include <asm/syscalls.h>
 
 /*
  * Known problems:

diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index edfb09f..8c9ad02 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c

@@ -393,13 +393,13 @@
 }
 #endif
 
-static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn)
 {
 	vmi_set_page_type(pfn, VMI_PAGE_L1);
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
 }
 
-static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn)
 {
  	/*
 	 * This call comes in very early, before mem_map is setup.
@@ -410,20 +410,20 @@
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
 }
 
-static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
+static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count)
 {
  	vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
 	vmi_check_page_type(clonepfn, VMI_PAGE_L2);
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
 }
 
-static void vmi_release_pte(u32 pfn)
+static void vmi_release_pte(unsigned long pfn)
 {
 	vmi_ops.release_page(pfn, VMI_PAGE_L1);
 	vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
 }
 
-static void vmi_release_pmd(u32 pfn)
+static void vmi_release_pmd(unsigned long pfn)
 {
 	vmi_ops.release_page(pfn, VMI_PAGE_L2);
 	vmi_set_page_type(pfn, VMI_PAGE_NORMAL);

diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c
index 01b868b..321cf72 100644
--- a/arch/x86/lib/msr-on-cpu.c
+++ b/arch/x86/lib/msr-on-cpu.c

@@ -16,37 +16,46 @@
 	rdmsr(rv->msr_no, rv->l, rv->h);
 }
 
-static void __rdmsr_safe_on_cpu(void *info)
+static void __wrmsr_on_cpu(void *info)
 {
 	struct msr_info *rv = info;
 
-	rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h);
+	wrmsr(rv->msr_no, rv->l, rv->h);
 }
 
-static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe)
+int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
-	int err = 0;
+	int err;
 	struct msr_info rv;
 
 	rv.msr_no = msr_no;
-	if (safe) {
-		err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu,
-					       &rv, 1);
-		err = err ? err : rv.err;
-	} else {
-		err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
-	}
+	err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
 	*l = rv.l;
 	*h = rv.h;
 
 	return err;
 }
 
-static void __wrmsr_on_cpu(void *info)
+int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+	int err;
+	struct msr_info rv;
+
+	rv.msr_no = msr_no;
+	rv.l = l;
+	rv.h = h;
+	err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
+
+	return err;
+}
+
+/* These "safe" variants are slower and should be used when the target MSR
+   may not actually exist. */
+static void __rdmsr_safe_on_cpu(void *info)
 {
 	struct msr_info *rv = info;
 
-	wrmsr(rv->msr_no, rv->l, rv->h);
+	rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h);
 }
 
 static void __wrmsr_safe_on_cpu(void *info)
@@ -56,45 +65,30 @@
 	rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h);
 }
 
-static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe)
+int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
-	int err = 0;
+	int err;
+	struct msr_info rv;
+
+	rv.msr_no = msr_no;
+	err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
+	*l = rv.l;
+	*h = rv.h;
+
+	return err ? err : rv.err;
+}
+
+int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+	int err;
 	struct msr_info rv;
 
 	rv.msr_no = msr_no;
 	rv.l = l;
 	rv.h = h;
-	if (safe) {
-		err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu,
-					       &rv, 1);
-		err = err ? err : rv.err;
-	} else {
-		err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
-	}
+	err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
 
-	return err;
-}
-
-int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
-	return _wrmsr_on_cpu(cpu, msr_no, l, h, 0);
-}
-
-int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	return _rdmsr_on_cpu(cpu, msr_no, l, h, 0);
-}
-
-/* These "safe" variants are slower and should be used when the target MSR
-   may not actually exist. */
-int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
-	return _wrmsr_on_cpu(cpu, msr_no, l, h, 1);
-}
-
-int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	return _rdmsr_on_cpu(cpu, msr_no, l, h, 1);
+	return err ? err : rv.err;
 }
 
 EXPORT_SYMBOL(rdmsr_on_cpu);

diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c
index 94972e7..82004d2 100644
--- a/arch/x86/lib/string_32.c
+++ b/arch/x86/lib/string_32.c

@@ -22,7 +22,7 @@
 		"testb %%al,%%al\n\t"
 		"jne 1b"
 		: "=&S" (d0), "=&D" (d1), "=&a" (d2)
-		:"0" (src), "1" (dest) : "memory");
+		: "0" (src), "1" (dest) : "memory");
 	return dest;
 }
 EXPORT_SYMBOL(strcpy);
@@ -42,7 +42,7 @@
 		"stosb\n"
 		"2:"
 		: "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
-		:"0" (src), "1" (dest), "2" (count) : "memory");
+		: "0" (src), "1" (dest), "2" (count) : "memory");
 	return dest;
 }
 EXPORT_SYMBOL(strncpy);
@@ -60,7 +60,7 @@
 		"testb %%al,%%al\n\t"
 		"jne 1b"
 		: "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
-		: "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory");
+		: "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu) : "memory");
 	return dest;
 }
 EXPORT_SYMBOL(strcat);
@@ -105,9 +105,9 @@
 		"2:\tsbbl %%eax,%%eax\n\t"
 		"orb $1,%%al\n"
 		"3:"
-		:"=a" (res), "=&S" (d0), "=&D" (d1)
-		:"1" (cs), "2" (ct)
-		:"memory");
+		: "=a" (res), "=&S" (d0), "=&D" (d1)
+		: "1" (cs), "2" (ct)
+		: "memory");
 	return res;
 }
 EXPORT_SYMBOL(strcmp);
@@ -130,9 +130,9 @@
 		"3:\tsbbl %%eax,%%eax\n\t"
 		"orb $1,%%al\n"
 		"4:"
-		:"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
-		:"1" (cs), "2" (ct), "3" (count)
-		:"memory");
+		: "=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+		: "1" (cs), "2" (ct), "3" (count)
+		: "memory");
 	return res;
 }
 EXPORT_SYMBOL(strncmp);
@@ -152,9 +152,9 @@
 		"movl $1,%1\n"
 		"2:\tmovl %1,%0\n\t"
 		"decl %0"
-		:"=a" (res), "=&S" (d0)
-		:"1" (s), "0" (c)
-		:"memory");
+		: "=a" (res), "=&S" (d0)
+		: "1" (s), "0" (c)
+		: "memory");
 	return res;
 }
 EXPORT_SYMBOL(strchr);
@@ -169,9 +169,9 @@
 		"scasb\n\t"
 		"notl %0\n\t"
 		"decl %0"
-		:"=c" (res), "=&D" (d0)
-		:"1" (s), "a" (0), "0" (0xffffffffu)
-		:"memory");
+		: "=c" (res), "=&D" (d0)
+		: "1" (s), "a" (0), "0" (0xffffffffu)
+		: "memory");
 	return res;
 }
 EXPORT_SYMBOL(strlen);
@@ -189,9 +189,9 @@
 		"je 1f\n\t"
 		"movl $1,%0\n"
 		"1:\tdecl %0"
-		:"=D" (res), "=&c" (d0)
-		:"a" (c), "0" (cs), "1" (count)
-		:"memory");
+		: "=D" (res), "=&c" (d0)
+		: "a" (c), "0" (cs), "1" (count)
+		: "memory");
 	return res;
 }
 EXPORT_SYMBOL(memchr);
@@ -228,9 +228,9 @@
 		"cmpl $-1,%1\n\t"
 		"jne 1b\n"
 		"3:\tsubl %2,%0"
-		:"=a" (res), "=&d" (d0)
-		:"c" (s), "1" (count)
-		:"memory");
+		: "=a" (res), "=&d" (d0)
+		: "c" (s), "1" (count)
+		: "memory");
 	return res;
 }
 EXPORT_SYMBOL(strnlen);

diff --git a/arch/x86/lib/strstr_32.c b/arch/x86/lib/strstr_32.c
index 42e8a50..8e2d55f 100644
--- a/arch/x86/lib/strstr_32.c
+++ b/arch/x86/lib/strstr_32.c

@@ -23,9 +23,9 @@
 	"jne 1b\n\t"
 	"xorl %%eax,%%eax\n\t"
 	"2:"
-	:"=a" (__res), "=&c" (d0), "=&S" (d1)
-	:"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
-	:"dx", "di");
+	: "=a" (__res), "=&c" (d0), "=&S" (d1)
+	: "0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
+	: "dx", "di");
 return __res;
 }
 

diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 3d31783..3f2cf11 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c

@@ -10,13 +10,15 @@
 #include <asm/e820.h>
 #include <asm/setup.h>
 
+#include <mach_ipi.h>
+
 #ifdef CONFIG_HOTPLUG_CPU
 #define DEFAULT_SEND_IPI	(1)
 #else
 #define DEFAULT_SEND_IPI	(0)
 #endif
 
-int no_broadcast=DEFAULT_SEND_IPI;
+int no_broadcast = DEFAULT_SEND_IPI;
 
 /**
  * pre_intr_init_hook - initialisation prior to setting up interrupt vectors

diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 62fa440..847c164 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c

@@ -328,7 +328,7 @@
 
 	get_memcfg_numa();
 
-	kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
+	kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
 
 	kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
 	do {

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index a20d1fa..e7277cb 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c

@@ -148,8 +148,8 @@
 	 * we have now. "break" is either changing perms, levels or
 	 * address space marker.
 	 */
-	prot = pgprot_val(new_prot) & ~(PTE_PFN_MASK);
-	cur = pgprot_val(st->current_prot) & ~(PTE_PFN_MASK);
+	prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
+	cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
 
 	if (!st->level) {
 		/* First entry */

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 455f3fe..8f92cac 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c

@@ -35,6 +35,7 @@
 #include <asm/tlbflush.h>
 #include <asm/proto.h>
 #include <asm-generic/sections.h>
+#include <asm/traps.h>
 
 /*
  * Page fault error code bits
@@ -357,8 +358,6 @@
 	return 0;
 }
 
-void do_invalid_op(struct pt_regs *, unsigned long);
-
 static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 {
 #ifdef CONFIG_X86_F00F_BUG

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 60ec1d0..6b9a935 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c

@@ -47,6 +47,7 @@
 #include <asm/paravirt.h>
 #include <asm/setup.h>
 #include <asm/cacheflush.h>
+#include <asm/smp.h>
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d3746ef..770536e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c

@@ -225,7 +225,7 @@
 void __init cleanup_highmap(void)
 {
 	unsigned long vaddr = __START_KERNEL_map;
-	unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1;
+	unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1;
 	pmd_t *pmd = level2_kernel_pgt;
 	pmd_t *last_pmd = pmd + PTRS_PER_PMD;
 
@@ -451,14 +451,14 @@
 	unsigned long puds, pmds, ptes, tables, start;
 
 	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
-	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
+	tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
 	if (direct_gbpages) {
 		unsigned long extra;
 		extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
 		pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
 	} else
 		pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
-	tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+	tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
 
 	if (cpu_has_pse) {
 		unsigned long extra;
@@ -466,7 +466,7 @@
 		ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	} else
 		ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
+	tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
 
 	/*
 	 * RED-PEN putting page tables only on node 0 could

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d4b6e6a..cac6da5 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c

@@ -421,7 +421,7 @@
 	return;
 }
 
-int __initdata early_ioremap_debug;
+static int __initdata early_ioremap_debug;
 
 static int __init early_ioremap_debug_setup(char *str)
 {
@@ -547,7 +547,7 @@
 }
 
 
-int __initdata early_ioremap_nested;
+static int __initdata early_ioremap_nested;
 
 static int __init check_early_ioremap_leak(void)
 {

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a4dd793..cebcbf1 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c

@@ -79,7 +79,7 @@
 		return 0;
 
 	addr = 0x8000;
-	nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
+	nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
 	nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT,
 				      nodemap_size, L1_CACHE_BYTES);
 	if (nodemap_addr == -1UL) {
@@ -176,10 +176,10 @@
 	unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
 	unsigned long bootmap_start, nodedata_phys;
 	void *bootmap;
-	const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
+	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
 	int nid;
 
-	start = round_up(start, ZONE_ALIGN);
+	start = roundup(start, ZONE_ALIGN);
 
 	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
 	       start, end);
@@ -210,9 +210,9 @@
 	bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
 	nid = phys_to_nid(nodedata_phys);
 	if (nid == nodeid)
-		bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+		bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
 	else
-		bootmap_start = round_up(start, PAGE_SIZE);
+		bootmap_start = roundup(start, PAGE_SIZE);
 	/*
 	 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like
 	 * to use that to align to PAGE_SIZE

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 43e2f84..898fad6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c

@@ -84,7 +84,7 @@
 
 static inline unsigned long highmap_end_pfn(void)
 {
-	return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
+	return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
 }
 
 #endif
@@ -906,11 +906,13 @@
 {
 	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
 }
+EXPORT_SYMBOL_GPL(set_memory_ro);
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
 	return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
 }
+EXPORT_SYMBOL_GPL(set_memory_rw);
 
 int set_memory_np(unsigned long addr, int numpages)
 {

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index d503027..86f2ffc 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c

@@ -63,10 +63,8 @@
 #define UNSHARED_PTRS_PER_PGD				\
 	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
 
-static void pgd_ctor(void *p)
+static void pgd_ctor(pgd_t *pgd)
 {
-	pgd_t *pgd = p;
-
 	/* If the pgd points to a shared pagetable level (either the
 	   ptes in non-PAE, or shared PMD in PAE), then just copy the
 	   references from swapper_pg_dir. */
@@ -87,7 +85,7 @@
 		pgd_list_add(pgd);
 }
 
-static void pgd_dtor(void *pgd)
+static void pgd_dtor(pgd_t *pgd)
 {
 	unsigned long flags; /* can be called from interrupt context */
 

diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index cab0abb..0951db9 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c

@@ -123,7 +123,8 @@
 	if (!arg)
 		return -EINVAL;
 
-	__VMALLOC_RESERVE = memparse(arg, &arg);
+	/* Add VMALLOC_OFFSET to the parsed value due to vm area guard hole*/
+	__VMALLOC_RESERVE = memparse(arg, &arg) + VMALLOC_OFFSET;
 	return 0;
 }
 early_param("vmalloc", parse_vmalloc);

diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 56b4757..43ac5af 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c

@@ -10,11 +10,12 @@
 
 #include <linux/oprofile.h>
 #include <linux/smp.h>
+#include <linux/ptrace.h>
+#include <linux/nmi.h>
 #include <asm/msr.h>
-#include <asm/ptrace.h>
 #include <asm/fixmap.h>
 #include <asm/apic.h>
-#include <asm/nmi.h>
+
 
 #include "op_x86_model.h"
 #include "op_counter.h"
@@ -40,7 +41,7 @@
 static inline void setup_num_counters(void)
 {
 #ifdef CONFIG_SMP
-	if (smp_num_siblings == 2){
+	if (smp_num_siblings == 2) {
 		num_counters = NUM_COUNTERS_HT2;
 		num_controls = NUM_CONTROLS_HT2;
 	}
@@ -86,7 +87,7 @@
 #define CTR_FLAME_2    (1 << 6)
 #define CTR_IQ_5       (1 << 7)
 
-static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
 	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
 	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
 	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
@@ -97,32 +98,32 @@
 	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
 };
 
-#define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
 
 /* p4 event codes in libop/op_event.h are indices into this table. */
 
 static struct p4_event_binding p4_events[NUM_EVENTS] = {
-	
+
 	{ /* BRANCH_RETIRED */
-		0x05, 0x06, 
+		0x05, 0x06,
 		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
 		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 	},
-	
+
 	{ /* MISPRED_BRANCH_RETIRED */
-		0x04, 0x03, 
+		0x04, 0x03,
 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 	},
-	
+
 	{ /* TC_DELIVER_MODE */
 		0x01, 0x01,
-		{ { CTR_MS_0, MSR_P4_TC_ESCR0},  
+		{ { CTR_MS_0, MSR_P4_TC_ESCR0},
 		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
 	},
-	
+
 	{ /* BPU_FETCH_REQUEST */
-		0x00, 0x03, 
+		0x00, 0x03,
 		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
 		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
 	},
@@ -146,7 +147,7 @@
 	},
 
 	{ /* LOAD_PORT_REPLAY */
-		0x02, 0x04, 
+		0x02, 0x04,
 		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
 	},
@@ -170,43 +171,43 @@
 	},
 
 	{ /* BSQ_CACHE_REFERENCE */
-		0x07, 0x0c, 
+		0x07, 0x0c,
 		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
 		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
 	},
 
 	{ /* IOQ_ALLOCATION */
-		0x06, 0x03, 
+		0x06, 0x03,
 		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 		  { 0, 0 } }
 	},
 
 	{ /* IOQ_ACTIVE_ENTRIES */
-		0x06, 0x1a, 
+		0x06, 0x1a,
 		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
 		  { 0, 0 } }
 	},
 
 	{ /* FSB_DATA_ACTIVITY */
-		0x06, 0x17, 
+		0x06, 0x17,
 		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
 	},
 
 	{ /* BSQ_ALLOCATION */
-		0x07, 0x05, 
+		0x07, 0x05,
 		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
 		  { 0, 0 } }
 	},
 
 	{ /* BSQ_ACTIVE_ENTRIES */
 		0x07, 0x06,
-		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
+		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
 		  { 0, 0 } }
 	},
 
 	{ /* X87_ASSIST */
-		0x05, 0x03, 
+		0x05, 0x03,
 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 	},
@@ -216,21 +217,21 @@
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* PACKED_SP_UOP */
-		0x01, 0x08, 
+		0x01, 0x08,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* PACKED_DP_UOP */
-		0x01, 0x0c, 
+		0x01, 0x0c,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
 
 	{ /* SCALAR_SP_UOP */
-		0x01, 0x0a, 
+		0x01, 0x0a,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
@@ -242,31 +243,31 @@
 	},
 
 	{ /* 64BIT_MMX_UOP */
-		0x01, 0x02, 
+		0x01, 0x02,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* 128BIT_MMX_UOP */
-		0x01, 0x1a, 
+		0x01, 0x1a,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
 
 	{ /* X87_FP_UOP */
-		0x01, 0x04, 
+		0x01, 0x04,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* X87_SIMD_MOVES_UOP */
-		0x01, 0x2e, 
+		0x01, 0x2e,
 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 	},
-  
+
 	{ /* MACHINE_CLEAR */
-		0x05, 0x02, 
+		0x05, 0x02,
 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 	},
@@ -276,9 +277,9 @@
 		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
 	},
-  
+
 	{ /* TC_MS_XFER */
-		0x00, 0x05, 
+		0x00, 0x05,
 		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
 		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
 	},
@@ -308,7 +309,7 @@
 	},
 
 	{ /* INSTR_RETIRED */
-		0x04, 0x02, 
+		0x04, 0x02,
 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 	},
@@ -319,14 +320,14 @@
 		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 	},
 
-	{ /* UOP_TYPE */    
-		0x02, 0x02, 
+	{ /* UOP_TYPE */
+		0x02, 0x02,
 		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
 		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
 	},
 
 	{ /* RETIRED_MISPRED_BRANCH_TYPE */
-		0x02, 0x05, 
+		0x02, 0x05,
 		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
 		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
 	},
@@ -349,8 +350,8 @@
 #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
 #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
 #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
-#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
-#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
+#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
 
 #define CCCR_RESERVED_BITS 0x38030FFF
 #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
@@ -360,15 +361,15 @@
 #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
 #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
 #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
-#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
-#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
 
-#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
-#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
+#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
+#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
+#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
 #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
 
 
@@ -380,7 +381,7 @@
 #ifdef CONFIG_SMP
 	int cpu = smp_processor_id();
 	return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
-#endif	
+#endif
 	return 0;
 }
 
@@ -395,25 +396,23 @@
 
 static void p4_fill_in_addresses(struct op_msrs * const msrs)
 {
-	unsigned int i; 
+	unsigned int i;
 	unsigned int addr, cccraddr, stag;
 
 	setup_num_counters();
 	stag = get_stagger();
 
 	/* initialize some registers */
-	for (i = 0; i < num_counters; ++i) {
+	for (i = 0; i < num_counters; ++i)
 		msrs->counters[i].addr = 0;
-	}
-	for (i = 0; i < num_controls; ++i) {
+	for (i = 0; i < num_controls; ++i)
 		msrs->controls[i].addr = 0;
-	}
-	
+
 	/* the counter & cccr registers we pay attention to */
 	for (i = 0; i < num_counters; ++i) {
 		addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
 		cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
-		if (reserve_perfctr_nmi(addr)){
+		if (reserve_perfctr_nmi(addr)) {
 			msrs->counters[i].addr = addr;
 			msrs->controls[i].addr = cccraddr;
 		}
@@ -447,22 +446,22 @@
 		if (reserve_evntsel_nmi(addr))
 			msrs->controls[i].addr = addr;
 	}
-	
+
 	for (addr = MSR_P4_MS_ESCR0 + stag;
-	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
+	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
 		if (reserve_evntsel_nmi(addr))
 			msrs->controls[i].addr = addr;
 	}
-	
+
 	for (addr = MSR_P4_IX_ESCR0 + stag;
-	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
+	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
 		if (reserve_evntsel_nmi(addr))
 			msrs->controls[i].addr = addr;
 	}
 
 	/* there are 2 remaining non-contiguously located ESCRs */
 
-	if (num_counters == NUM_COUNTERS_NON_HT) {		
+	if (num_counters == NUM_COUNTERS_NON_HT) {
 		/* standard non-HT CPUs handle both remaining ESCRs*/
 		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
 			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
@@ -498,20 +497,20 @@
 	unsigned int stag;
 
 	stag = get_stagger();
-	
+
 	/* convert from counter *number* to counter *bit* */
 	counter_bit = 1 << VIRT_CTR(stag, ctr);
-	
+
 	/* find our event binding structure. */
 	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
-		printk(KERN_ERR 
-		       "oprofile: P4 event code 0x%lx out of range\n", 
+		printk(KERN_ERR
+		       "oprofile: P4 event code 0x%lx out of range\n",
 		       counter_config[ctr].event);
 		return;
 	}
-	
+
 	ev = &(p4_events[counter_config[ctr].event - 1]);
-	
+
 	for (i = 0; i < maxbind; i++) {
 		if (ev->bindings[i].virt_counter & counter_bit) {
 
@@ -526,25 +525,24 @@
 				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
 			}
 			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
-			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);			
+			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
 			ESCR_WRITE(escr, high, ev, i);
-		       
+
 			/* modify CCCR */
 			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
 			CCCR_CLEAR(cccr);
 			CCCR_SET_REQUIRED_BITS(cccr);
 			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
-			if (stag == 0) {
+			if (stag == 0)
 				CCCR_SET_PMI_OVF_0(cccr);
-			} else {
+			else
 				CCCR_SET_PMI_OVF_1(cccr);
-			}
 			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
 			return;
 		}
 	}
 
-	printk(KERN_ERR 
+	printk(KERN_ERR
 	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
 	       counter_config[ctr].event, stag, ctr);
 }
@@ -559,14 +557,14 @@
 	stag = get_stagger();
 
 	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
-	if (! MISC_PMC_ENABLED_P(low)) {
+	if (!MISC_PMC_ENABLED_P(low)) {
 		printk(KERN_ERR "oprofile: P4 PMC not available\n");
 		return;
 	}
 
 	/* clear the cccrs we will use */
 	for (i = 0 ; i < num_counters ; i++) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 			continue;
 		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 		CCCR_CLEAR(low);
@@ -576,14 +574,14 @@
 
 	/* clear all escrs (including those outside our concern) */
 	for (i = num_counters; i < num_controls; i++) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 			continue;
 		wrmsr(msrs->controls[i].addr, 0, 0);
 	}
 
 	/* setup all counters */
 	for (i = 0 ; i < num_counters ; ++i) {
-		if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
+		if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
 			reset_value[i] = counter_config[i].count;
 			pmc_setup_one_p4_counter(i);
 			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
@@ -603,11 +601,11 @@
 	stag = get_stagger();
 
 	for (i = 0; i < num_counters; ++i) {
-		
-		if (!reset_value[i]) 
+
+		if (!reset_value[i])
 			continue;
 
-		/* 
+		/*
 		 * there is some eccentricity in the hardware which
 		 * requires that we perform 2 extra corrections:
 		 *
@@ -616,24 +614,24 @@
 		 *
 		 * - write the counter back twice to ensure it gets
 		 *   updated properly.
-		 * 
+		 *
 		 * the former seems to be related to extra NMIs happening
 		 * during the current NMI; the latter is reported as errata
 		 * N15 in intel doc 249199-029, pentium 4 specification
 		 * update, though their suggested work-around does not
 		 * appear to solve the problem.
 		 */
-		
+
 		real = VIRT_CTR(stag, i);
 
 		CCCR_READ(low, high, real);
- 		CTR_READ(ctr, high, real);
+		CTR_READ(ctr, high, real);
 		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
 			oprofile_add_sample(regs, i);
- 			CTR_WRITE(reset_value[i], real);
+			CTR_WRITE(reset_value[i], real);
 			CCCR_CLEAR_OVF(low);
 			CCCR_WRITE(low, high, real);
- 			CTR_WRITE(reset_value[i], real);
+			CTR_WRITE(reset_value[i], real);
 		}
 	}
 
@@ -683,15 +681,16 @@
 	int i;
 
 	for (i = 0 ; i < num_counters ; ++i) {
-		if (CTR_IS_RESERVED(msrs,i))
+		if (CTR_IS_RESERVED(msrs, i))
 			release_perfctr_nmi(msrs->counters[i].addr);
 	}
-	/* some of the control registers are specially reserved in
+	/*
+	 * some of the control registers are specially reserved in
 	 * conjunction with the counter registers (hence the starting offset).
 	 * This saves a few bits.
 	 */
 	for (i = num_counters ; i < num_controls ; ++i) {
-		if (CTRL_IS_RESERVED(msrs,i))
+		if (CTRL_IS_RESERVED(msrs, i))
 			release_evntsel_nmi(msrs->controls[i].addr);
 	}
 }

diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 6a0fca7..22e0576 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c

@@ -580,7 +580,7 @@
 				    unsigned long action, void *hcpu)
 {
 	int cpu = (long)hcpu;
-	switch(action) {
+	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
 		smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0);

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 8e07718..006599d 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c

@@ -1043,35 +1043,44 @@
 		if (io_apic_assign_pci_irqs) {
 			int irq;
 
-			if (pin) {
-				/*
-				 * interrupt pins are numbered starting
-				 * from 1
-				 */
-				pin--;
-				irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-					PCI_SLOT(dev->devfn), pin);
-	/*
-	 * Busses behind bridges are typically not listed in the MP-table.
-	 * In this case we have to look up the IRQ based on the parent bus,
-	 * parent slot, and pin number. The SMP code detects such bridged
-	 * busses itself so we should get into this branch reliably.
-	 */
-				if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
-					struct pci_dev *bridge = dev->bus->self;
+			if (!pin)
+				continue;
 
-					pin = (pin + PCI_SLOT(dev->devfn)) % 4;
-					irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
-							PCI_SLOT(bridge->devfn), pin);
-					if (irq >= 0)
-						dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n",
-							 pci_name(bridge),
-							 'A' + pin, irq);
-				}
-				if (irq >= 0) {
-					dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq);
-					dev->irq = irq;
-				}
+			/*
+			 * interrupt pins are numbered starting from 1
+			 */
+			pin--;
+			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
+				PCI_SLOT(dev->devfn), pin);
+			/*
+			 * Busses behind bridges are typically not listed in the
+			 * MP-table.  In this case we have to look up the IRQ
+			 * based on the parent bus, parent slot, and pin number.
+			 * The SMP code detects such bridged busses itself so we
+			 * should get into this branch reliably.
+			 */
+			if (irq < 0 && dev->bus->parent) {
+				/* go back to the bridge */
+				struct pci_dev *bridge = dev->bus->self;
+				int bus;
+
+				pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+				bus = bridge->bus->number;
+				irq = IO_APIC_get_PCI_irq_vector(bus,
+						PCI_SLOT(bridge->devfn), pin);
+				if (irq >= 0)
+					dev_warn(&dev->dev,
+						"using bridge %s INT %c to "
+							"get IRQ %d\n",
+						 pci_name(bridge),
+						 'A' + pin, irq);
+			}
+			if (irq >= 0) {
+				dev_info(&dev->dev,
+					"PCI->APIC IRQ transform: INT %c "
+						"-> IRQ %d\n",
+					'A' + pin, irq);
+				dev->irq = irq;
 			}
 		}
 #endif

diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index 4fc7e87..d1e9b53 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S

@@ -1,5 +1,3 @@
-.text
-
 /*
  * This may not use any stack, nor any variable that is not "NoSave":
  *
@@ -12,17 +10,18 @@
 #include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/asm-offsets.h>
+#include <asm/processor-flags.h>
 
-	.text
+.text
 
 ENTRY(swsusp_arch_suspend)
-
 	movl %esp, saved_context_esp
 	movl %ebx, saved_context_ebx
 	movl %ebp, saved_context_ebp
 	movl %esi, saved_context_esi
 	movl %edi, saved_context_edi
-	pushfl ; popl saved_context_eflags
+	pushfl
+	popl saved_context_eflags
 
 	call swsusp_save
 	ret
@@ -59,7 +58,7 @@
 	movl	mmu_cr4_features, %ecx
 	jecxz	1f	# cr4 Pentium and higher, skip if zero
 	movl	%ecx, %edx
-	andl	$~(1<<7), %edx;  # PGE
+	andl	$~(X86_CR4_PGE), %edx
 	movl	%edx, %cr4;  # turn off PGE
 1:
 	movl	%cr3, %eax;  # flush TLB
@@ -74,7 +73,8 @@
 	movl saved_context_esi, %esi
 	movl saved_context_edi, %edi
 
-	pushl saved_context_eflags ; popfl
+	pushl saved_context_eflags
+	popfl
 
 	xorl	%eax, %eax
 

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a4e201b..7dcd321 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c

@@ -812,7 +812,7 @@
 
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
-static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
+static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
 {
 #ifdef CONFIG_FLATMEM
 	BUG_ON(mem_map);	/* should only be used early */
@@ -822,7 +822,7 @@
 
 /* Early release_pte assumes that all pts are pinned, since there's
    only init_mm and anything attached to that is pinned. */
-static void xen_release_pte_init(u32 pfn)
+static void xen_release_pte_init(unsigned long pfn)
 {
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
@@ -838,7 +838,7 @@
 
 /* This needs to make sure the new pte page is pinned iff its being
    attached to a pinned pagetable. */
-static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
+static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
 {
 	struct page *page = pfn_to_page(pfn);
 
@@ -856,12 +856,12 @@
 	}
 }
 
-static void xen_alloc_pte(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PTE);
 }
 
-static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PMD);
 }
@@ -909,7 +909,7 @@
 }
 
 /* This should never happen until we're OK to use struct page */
-static void xen_release_ptpage(u32 pfn, unsigned level)
+static void xen_release_ptpage(unsigned long pfn, unsigned level)
 {
 	struct page *page = pfn_to_page(pfn);
 
@@ -923,23 +923,23 @@
 	}
 }
 
-static void xen_release_pte(u32 pfn)
+static void xen_release_pte(unsigned long pfn)
 {
 	xen_release_ptpage(pfn, PT_PTE);
 }
 
-static void xen_release_pmd(u32 pfn)
+static void xen_release_pmd(unsigned long pfn)
 {
 	xen_release_ptpage(pfn, PT_PMD);
 }
 
 #if PAGETABLE_LEVELS == 4
-static void xen_alloc_pud(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PUD);
 }
 
-static void xen_release_pud(u32 pfn)
+static void xen_release_pud(unsigned long pfn)
 {
 	xen_release_ptpage(pfn, PT_PUD);
 }

diff --git a/block/Makefile b/block/Makefile
index 208000b..bfe7304 100644
--- a/block/Makefile
+++ b/block/Makefile

@@ -4,8 +4,8 @@
 
 obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
-			blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
-			cmd-filter.o
+			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
+			ioctl.o genhd.o scsi_ioctl.o cmd-filter.o
 
 obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o

diff --git a/block/as-iosched.c b/block/as-iosched.c
index cf4eb0e..71f0abb 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c

@@ -462,7 +462,7 @@
 			del_timer(&ad->antic_timer);
 		ad->antic_status = ANTIC_FINISHED;
 		/* see as_work_handler */
-		kblockd_schedule_work(&ad->antic_work);
+		kblockd_schedule_work(ad->q, &ad->antic_work);
 	}
 }
 
@@ -483,7 +483,7 @@
 		aic = ad->io_context->aic;
 
 		ad->antic_status = ANTIC_FINISHED;
-		kblockd_schedule_work(&ad->antic_work);
+		kblockd_schedule_work(q, &ad->antic_work);
 
 		if (aic->ttime_samples == 0) {
 			/* process anticipated on has exited or timed out*/
@@ -745,6 +745,14 @@
  */
 static int as_can_anticipate(struct as_data *ad, struct request *rq)
 {
+#if 0 /* disable for now, we need to check tag level as well */
+	/*
+	 * SSD device without seek penalty, disable idling
+	 */
+	if (blk_queue_nonrot(ad->q)) axman
+		return 0;
+#endif
+
 	if (!ad->io_context)
 		/*
 		 * Last request submitted was a write
@@ -844,7 +852,7 @@
 	if (ad->changed_batch && ad->nr_dispatched == 1) {
 		ad->current_batch_expires = jiffies +
 					ad->batch_expire[ad->batch_data_dir];
-		kblockd_schedule_work(&ad->antic_work);
+		kblockd_schedule_work(q, &ad->antic_work);
 		ad->changed_batch = 0;
 
 		if (ad->batch_data_dir == REQ_SYNC)

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index a09ead1..5c99ff8 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c

@@ -293,7 +293,7 @@
 	bio->bi_end_io = bio_end_empty_barrier;
 	bio->bi_private = &wait;
 	bio->bi_bdev = bdev;
-	submit_bio(1 << BIO_RW_BARRIER, bio);
+	submit_bio(WRITE_BARRIER, bio);
 
 	wait_for_completion(&wait);
 
@@ -315,3 +315,73 @@
 	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
+
+static void blkdev_discard_end_io(struct bio *bio, int err)
+{
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+	}
+
+	bio_put(bio);
+}
+
+/**
+ * blkdev_issue_discard - queue a discard
+ * @bdev:	blockdev to issue discard for
+ * @sector:	start sector
+ * @nr_sects:	number of sectors to discard
+ * @gfp_mask:	memory allocation flags (for bio_alloc)
+ *
+ * Description:
+ *    Issue a discard request for the sectors in question. Does not wait.
+ */
+int blkdev_issue_discard(struct block_device *bdev,
+			 sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
+{
+	struct request_queue *q;
+	struct bio *bio;
+	int ret = 0;
+
+	if (bdev->bd_disk == NULL)
+		return -ENXIO;
+
+	q = bdev_get_queue(bdev);
+	if (!q)
+		return -ENXIO;
+
+	if (!q->prepare_discard_fn)
+		return -EOPNOTSUPP;
+
+	while (nr_sects && !ret) {
+		bio = bio_alloc(gfp_mask, 0);
+		if (!bio)
+			return -ENOMEM;
+
+		bio->bi_end_io = blkdev_discard_end_io;
+		bio->bi_bdev = bdev;
+
+		bio->bi_sector = sector;
+
+		if (nr_sects > q->max_hw_sectors) {
+			bio->bi_size = q->max_hw_sectors << 9;
+			nr_sects -= q->max_hw_sectors;
+			sector += q->max_hw_sectors;
+		} else {
+			bio->bi_size = nr_sects << 9;
+			nr_sects = 0;
+		}
+		bio_get(bio);
+		submit_bio(DISCARD_BARRIER, bio);
+
+		/* Check if it failed immediately */
+		if (bio_flagged(bio, BIO_EOPNOTSUPP))
+			ret = -EOPNOTSUPP;
+		else if (!bio_flagged(bio, BIO_UPTODATE))
+			ret = -EIO;
+		bio_put(bio);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_discard);

diff --git a/block/blk-core.c b/block/blk-core.c
index 2cba5ef..2d053b5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c

@@ -26,8 +26,6 @@
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/task_io_accounting_ops.h>
-#include <linux/interrupt.h>
-#include <linux/cpu.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
 
@@ -50,27 +48,26 @@
  */
 static struct workqueue_struct *kblockd_workqueue;
 
-static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
-
 static void drive_stat_acct(struct request *rq, int new_io)
 {
 	struct hd_struct *part;
 	int rw = rq_data_dir(rq);
+	int cpu;
 
 	if (!blk_fs_request(rq) || !rq->rq_disk)
 		return;
 
-	part = get_part(rq->rq_disk, rq->sector);
+	cpu = part_stat_lock();
+	part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
+
 	if (!new_io)
-		__all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector);
+		part_stat_inc(cpu, part, merges[rw]);
 	else {
-		disk_round_stats(rq->rq_disk);
-		rq->rq_disk->in_flight++;
-		if (part) {
-			part_round_stats(part);
-			part->in_flight++;
-		}
+		part_round_stats(cpu, part);
+		part_inc_in_flight(part);
 	}
+
+	part_stat_unlock();
 }
 
 void blk_queue_congestion_threshold(struct request_queue *q)
@@ -113,7 +110,8 @@
 	memset(rq, 0, sizeof(*rq));
 
 	INIT_LIST_HEAD(&rq->queuelist);
-	INIT_LIST_HEAD(&rq->donelist);
+	INIT_LIST_HEAD(&rq->timeout_list);
+	rq->cpu = -1;
 	rq->q = q;
 	rq->sector = rq->hard_sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
@@ -308,7 +306,7 @@
 	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
 				q->rq.count[READ] + q->rq.count[WRITE]);
 
-	kblockd_schedule_work(&q->unplug_work);
+	kblockd_schedule_work(q, &q->unplug_work);
 }
 
 void blk_unplug(struct request_queue *q)
@@ -325,6 +323,21 @@
 }
 EXPORT_SYMBOL(blk_unplug);
 
+static void blk_invoke_request_fn(struct request_queue *q)
+{
+	/*
+	 * one level of recursion is ok and is much faster than kicking
+	 * the unplug handling
+	 */
+	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+		q->request_fn(q);
+		queue_flag_clear(QUEUE_FLAG_REENTER, q);
+	} else {
+		queue_flag_set(QUEUE_FLAG_PLUGGED, q);
+		kblockd_schedule_work(q, &q->unplug_work);
+	}
+}
+
 /**
  * blk_start_queue - restart a previously stopped queue
  * @q:    The &struct request_queue in question
@@ -339,18 +352,7 @@
 	WARN_ON(!irqs_disabled());
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-
-	/*
-	 * one level of recursion is ok and is much faster than kicking
-	 * the unplug handling
-	 */
-	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-		q->request_fn(q);
-		queue_flag_clear(QUEUE_FLAG_REENTER, q);
-	} else {
-		blk_plug_device(q);
-		kblockd_schedule_work(&q->unplug_work);
-	}
+	blk_invoke_request_fn(q);
 }
 EXPORT_SYMBOL(blk_start_queue);
 
@@ -408,15 +410,8 @@
 	 * Only recurse once to avoid overrunning the stack, let the unplug
 	 * handling reinvoke the handler shortly if we already got there.
 	 */
-	if (!elv_queue_empty(q)) {
-		if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-			q->request_fn(q);
-			queue_flag_clear(QUEUE_FLAG_REENTER, q);
-		} else {
-			blk_plug_device(q);
-			kblockd_schedule_work(&q->unplug_work);
-		}
-	}
+	if (!elv_queue_empty(q))
+		blk_invoke_request_fn(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -441,6 +436,14 @@
 
 void blk_cleanup_queue(struct request_queue *q)
 {
+	/*
+	 * We know we have process context here, so we can be a little
+	 * cautious and ensure that pending block actions on this device
+	 * are done before moving on. Going into this function, we should
+	 * not have processes doing IO to this device.
+	 */
+	blk_sync_queue(q);
+
 	mutex_lock(&q->sysfs_lock);
 	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
 	mutex_unlock(&q->sysfs_lock);
@@ -496,6 +499,8 @@
 	}
 
 	init_timer(&q->unplug_timer);
+	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
+	INIT_LIST_HEAD(&q->timeout_list);
 
 	kobject_init(&q->kobj, &blk_queue_ktype);
 
@@ -531,7 +536,7 @@
  *    request queue; this lock will be taken also from interrupt context, so irq
  *    disabling is needed for it.
  *
- *    Function returns a pointer to the initialized request queue, or NULL if
+ *    Function returns a pointer to the initialized request queue, or %NULL if
  *    it didn't succeed.
  *
  * Note:
@@ -569,7 +574,8 @@
 	q->request_fn		= rfn;
 	q->prep_rq_fn		= NULL;
 	q->unplug_fn		= generic_unplug_device;
-	q->queue_flags		= (1 << QUEUE_FLAG_CLUSTER);
+	q->queue_flags		= (1 << QUEUE_FLAG_CLUSTER |
+				   1 << QUEUE_FLAG_STACKABLE);
 	q->queue_lock		= lock;
 
 	blk_queue_segment_boundary(q, 0xffffffff);
@@ -624,10 +630,6 @@
 
 	blk_rq_init(q, rq);
 
-	/*
-	 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
-	 * see bio.h and blkdev.h
-	 */
 	rq->cmd_flags = rw | REQ_ALLOCED;
 
 	if (priv) {
@@ -888,9 +890,11 @@
  */
 void blk_start_queueing(struct request_queue *q)
 {
-	if (!blk_queue_plugged(q))
+	if (!blk_queue_plugged(q)) {
+		if (unlikely(blk_queue_stopped(q)))
+			return;
 		q->request_fn(q);
-	else
+	} else
 		__generic_unplug_device(q);
 }
 EXPORT_SYMBOL(blk_start_queueing);
@@ -907,6 +911,8 @@
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
+	blk_delete_timer(rq);
+	blk_clear_rq_complete(rq);
 	blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
 
 	if (blk_rq_tagged(rq))
@@ -917,7 +923,7 @@
 EXPORT_SYMBOL(blk_requeue_request);
 
 /**
- * blk_insert_request - insert a special request in to a request queue
+ * blk_insert_request - insert a special request into a request queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
  * @at_head:	insert request at head or tail of queue
@@ -927,8 +933,8 @@
  *    Many block devices need to execute commands asynchronously, so they don't
  *    block the whole kernel from preemption during request execution.  This is
  *    accomplished normally by inserting aritficial requests tagged as
- *    REQ_SPECIAL in to the corresponding request queue, and letting them be
- *    scheduled for actual execution by the request queue.
+ *    REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
+ *    be scheduled for actual execution by the request queue.
  *
  *    We have the option of inserting the head or the tail of the queue.
  *    Typically we use the tail for new ioctls and so forth.  We use the head
@@ -982,8 +988,22 @@
 	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
 }
 
-/*
- * disk_round_stats()	- Round off the performance stats on a struct
+static void part_round_stats_single(int cpu, struct hd_struct *part,
+				    unsigned long now)
+{
+	if (now == part->stamp)
+		return;
+
+	if (part->in_flight) {
+		__part_stat_add(cpu, part, time_in_queue,
+				part->in_flight * (now - part->stamp));
+		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
+	}
+	part->stamp = now;
+}
+
+/**
+ * part_round_stats()	- Round off the performance stats on a struct
  * disk_stats.
  *
  * The average IO queue length and utilisation statistics are maintained
@@ -997,36 +1017,15 @@
  * /proc/diskstats.  This accounts immediately for all queue usage up to
  * the current jiffies and restarts the counters again.
  */
-void disk_round_stats(struct gendisk *disk)
+void part_round_stats(int cpu, struct hd_struct *part)
 {
 	unsigned long now = jiffies;
 
-	if (now == disk->stamp)
-		return;
-
-	if (disk->in_flight) {
-		__disk_stat_add(disk, time_in_queue,
-				disk->in_flight * (now - disk->stamp));
-		__disk_stat_add(disk, io_ticks, (now - disk->stamp));
-	}
-	disk->stamp = now;
+	if (part->partno)
+		part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
+	part_round_stats_single(cpu, part, now);
 }
-EXPORT_SYMBOL_GPL(disk_round_stats);
-
-void part_round_stats(struct hd_struct *part)
-{
-	unsigned long now = jiffies;
-
-	if (now == part->stamp)
-		return;
-
-	if (part->in_flight) {
-		__part_stat_add(part, time_in_queue,
-				part->in_flight * (now - part->stamp));
-		__part_stat_add(part, io_ticks, (now - part->stamp));
-	}
-	part->stamp = now;
-}
+EXPORT_SYMBOL_GPL(part_round_stats);
 
 /*
  * queue lock must be held
@@ -1070,6 +1069,7 @@
 
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
+	req->cpu = bio->bi_comp_cpu;
 	req->cmd_type = REQ_TYPE_FS;
 
 	/*
@@ -1081,7 +1081,12 @@
 	/*
 	 * REQ_BARRIER implies no merging, but lets make it explicit
 	 */
-	if (unlikely(bio_barrier(bio)))
+	if (unlikely(bio_discard(bio))) {
+		req->cmd_flags |= REQ_DISCARD;
+		if (bio_barrier(bio))
+			req->cmd_flags |= REQ_SOFTBARRIER;
+		req->q->prepare_discard_fn(req->q, req);
+	} else if (unlikely(bio_barrier(bio)))
 		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 
 	if (bio_sync(bio))
@@ -1099,7 +1104,7 @@
 static int __make_request(struct request_queue *q, struct bio *bio)
 {
 	struct request *req;
-	int el_ret, nr_sectors, barrier, err;
+	int el_ret, nr_sectors, barrier, discard, err;
 	const unsigned short prio = bio_prio(bio);
 	const int sync = bio_sync(bio);
 	int rw_flags;
@@ -1114,7 +1119,14 @@
 	blk_queue_bounce(q, &bio);
 
 	barrier = bio_barrier(bio);
-	if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
+	if (unlikely(barrier) && bio_has_data(bio) &&
+	    (q->next_ordered == QUEUE_ORDERED_NONE)) {
+		err = -EOPNOTSUPP;
+		goto end_io;
+	}
+
+	discard = bio_discard(bio);
+	if (unlikely(discard) && !q->prepare_discard_fn) {
 		err = -EOPNOTSUPP;
 		goto end_io;
 	}
@@ -1138,6 +1150,8 @@
 		req->biotail = bio;
 		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 		req->ioprio = ioprio_best(req->ioprio, prio);
+		if (!blk_rq_cpu_valid(req))
+			req->cpu = bio->bi_comp_cpu;
 		drive_stat_acct(req, 0);
 		if (!attempt_back_merge(q, req))
 			elv_merged_request(q, req, el_ret);
@@ -1165,6 +1179,8 @@
 		req->sector = req->hard_sector = bio->bi_sector;
 		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 		req->ioprio = ioprio_best(req->ioprio, prio);
+		if (!blk_rq_cpu_valid(req))
+			req->cpu = bio->bi_comp_cpu;
 		drive_stat_acct(req, 0);
 		if (!attempt_front_merge(q, req))
 			elv_merged_request(q, req, el_ret);
@@ -1200,13 +1216,15 @@
 	init_request_from_bio(req, bio);
 
 	spin_lock_irq(q->queue_lock);
+	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
+	    bio_flagged(bio, BIO_CPU_AFFINE))
+		req->cpu = blk_cpu_to_group(smp_processor_id());
 	if (elv_queue_empty(q))
 		blk_plug_device(q);
 	add_request(q, req);
 out:
 	if (sync)
 		__generic_unplug_device(q);
-
 	spin_unlock_irq(q->queue_lock);
 	return 0;
 
@@ -1260,8 +1278,9 @@
 
 static int should_fail_request(struct bio *bio)
 {
-	if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) ||
-	    (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail))
+	struct hd_struct *part = bio->bi_bdev->bd_part;
+
+	if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
 		return should_fail(&fail_make_request, bio->bi_size);
 
 	return 0;
@@ -1314,7 +1333,7 @@
 }
 
 /**
- * generic_make_request: hand a buffer to its device driver for I/O
+ * generic_make_request - hand a buffer to its device driver for I/O
  * @bio:  The bio describing the location in memory and on the device.
  *
  * generic_make_request() is used to make I/O requests of block
@@ -1409,7 +1428,8 @@
 
 		if (bio_check_eod(bio, nr_sectors))
 			goto end_io;
-		if (bio_empty_barrier(bio) && !q->prepare_flush_fn) {
+		if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||
+		    (bio_discard(bio) && !q->prepare_discard_fn)) {
 			err = -EOPNOTSUPP;
 			goto end_io;
 		}
@@ -1471,13 +1491,13 @@
 EXPORT_SYMBOL(generic_make_request);
 
 /**
- * submit_bio: submit a bio to the block device layer for I/O
+ * submit_bio - submit a bio to the block device layer for I/O
  * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
  * @bio: The &struct bio which describes the I/O
  *
  * submit_bio() is very similar in purpose to generic_make_request(), and
  * uses that function to do most of the work. Both are fairly rough
- * interfaces, @bio must be presetup and ready for I/O.
+ * interfaces; @bio must be presetup and ready for I/O.
  *
  */
 void submit_bio(int rw, struct bio *bio)
@@ -1490,11 +1510,7 @@
 	 * If it's a regular read/write or a barrier with data attached,
 	 * go through the normal accounting stuff before submission.
 	 */
-	if (!bio_empty_barrier(bio)) {
-
-		BIO_BUG_ON(!bio->bi_size);
-		BIO_BUG_ON(!bio->bi_io_vec);
-
+	if (bio_has_data(bio)) {
 		if (rw & WRITE) {
 			count_vm_events(PGPGOUT, count);
 		} else {
@@ -1517,9 +1533,90 @@
 EXPORT_SYMBOL(submit_bio);
 
 /**
+ * blk_rq_check_limits - Helper function to check a request for the queue limit
+ * @q:  the queue
+ * @rq: the request being checked
+ *
+ * Description:
+ *    @rq may have been made based on weaker limitations of upper-level queues
+ *    in request stacking drivers, and it may violate the limitation of @q.
+ *    Since the block layer and the underlying device driver trust @rq
+ *    after it is inserted to @q, it should be checked against @q before
+ *    the insertion using this generic function.
+ *
+ *    This function should also be useful for request stacking drivers
+ *    in some cases below, so export this fuction.
+ *    Request stacking drivers like request-based dm may change the queue
+ *    limits while requests are in the queue (e.g. dm's table swapping).
+ *    Such request stacking drivers should check those requests agaist
+ *    the new queue limits again when they dispatch those requests,
+ *    although such checkings are also done against the old queue limits
+ *    when submitting requests.
+ */
+int blk_rq_check_limits(struct request_queue *q, struct request *rq)
+{
+	if (rq->nr_sectors > q->max_sectors ||
+	    rq->data_len > q->max_hw_sectors << 9) {
+		printk(KERN_ERR "%s: over max size limit.\n", __func__);
+		return -EIO;
+	}
+
+	/*
+	 * queue's settings related to segment counting like q->bounce_pfn
+	 * may differ from that of other stacking queues.
+	 * Recalculate it to check the request correctly on this queue's
+	 * limitation.
+	 */
+	blk_recalc_rq_segments(rq);
+	if (rq->nr_phys_segments > q->max_phys_segments ||
+	    rq->nr_phys_segments > q->max_hw_segments) {
+		printk(KERN_ERR "%s: over max segments limit.\n", __func__);
+		return -EIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blk_rq_check_limits);
+
+/**
+ * blk_insert_cloned_request - Helper for stacking drivers to submit a request
+ * @q:  the queue to submit the request
+ * @rq: the request being queued
+ */
+int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
+{
+	unsigned long flags;
+
+	if (blk_rq_check_limits(q, rq))
+		return -EIO;
+
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+	if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
+	    should_fail(&fail_make_request, blk_rq_bytes(rq)))
+		return -EIO;
+#endif
+
+	spin_lock_irqsave(q->queue_lock, flags);
+
+	/*
+	 * Submitting request must be dequeued before calling this function
+	 * because it will be linked to another request_queue
+	 */
+	BUG_ON(blk_queued_rq(rq));
+
+	drive_stat_acct(rq, 1);
+	__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
+
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
+
+/**
  * __end_that_request_first - end I/O on a request
  * @req:      the request being processed
- * @error:    0 for success, < 0 for error
+ * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -1527,8 +1624,8 @@
  *     for the next range of segments (if any) in the cluster.
  *
  * Return:
- *     0 - we are done with this request, call end_that_request_last()
- *     1 - still buffers pending for this request
+ *     %0 - we are done with this request, call end_that_request_last()
+ *     %1 - still buffers pending for this request
  **/
 static int __end_that_request_first(struct request *req, int error,
 				    int nr_bytes)
@@ -1539,7 +1636,7 @@
 	blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
 
 	/*
-	 * for a REQ_BLOCK_PC request, we want to carry any eventual
+	 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
 	 * sense key with us all the way through
 	 */
 	if (!blk_pc_request(req))
@@ -1552,11 +1649,14 @@
 	}
 
 	if (blk_fs_request(req) && req->rq_disk) {
-		struct hd_struct *part = get_part(req->rq_disk, req->sector);
 		const int rw = rq_data_dir(req);
+		struct hd_struct *part;
+		int cpu;
 
-		all_stat_add(req->rq_disk, part, sectors[rw],
-				nr_bytes >> 9, req->sector);
+		cpu = part_stat_lock();
+		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+		part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9);
+		part_stat_unlock();
 	}
 
 	total_bytes = bio_nbytes = 0;
@@ -1641,88 +1741,14 @@
 }
 
 /*
- * splice the completion data to a local structure and hand off to
- * process_completion_queue() to complete the requests
- */
-static void blk_done_softirq(struct softirq_action *h)
-{
-	struct list_head *cpu_list, local_list;
-
-	local_irq_disable();
-	cpu_list = &__get_cpu_var(blk_cpu_done);
-	list_replace_init(cpu_list, &local_list);
-	local_irq_enable();
-
-	while (!list_empty(&local_list)) {
-		struct request *rq;
-
-		rq = list_entry(local_list.next, struct request, donelist);
-		list_del_init(&rq->donelist);
-		rq->q->softirq_done_fn(rq);
-	}
-}
-
-static int __cpuinit blk_cpu_notify(struct notifier_block *self,
-				    unsigned long action, void *hcpu)
-{
-	/*
-	 * If a CPU goes away, splice its entries to the current CPU
-	 * and trigger a run of the softirq
-	 */
-	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-		int cpu = (unsigned long) hcpu;
-
-		local_irq_disable();
-		list_splice_init(&per_cpu(blk_cpu_done, cpu),
-				 &__get_cpu_var(blk_cpu_done));
-		raise_softirq_irqoff(BLOCK_SOFTIRQ);
-		local_irq_enable();
-	}
-
-	return NOTIFY_OK;
-}
-
-
-static struct notifier_block blk_cpu_notifier __cpuinitdata = {
-	.notifier_call	= blk_cpu_notify,
-};
-
-/**
- * blk_complete_request - end I/O on a request
- * @req:      the request being processed
- *
- * Description:
- *     Ends all I/O on a request. It does not handle partial completions,
- *     unless the driver actually implements this in its completion callback
- *     through requeueing. The actual completion happens out-of-order,
- *     through a softirq handler. The user must have registered a completion
- *     callback through blk_queue_softirq_done().
- **/
-
-void blk_complete_request(struct request *req)
-{
-	struct list_head *cpu_list;
-	unsigned long flags;
-
-	BUG_ON(!req->q->softirq_done_fn);
-
-	local_irq_save(flags);
-
-	cpu_list = &__get_cpu_var(blk_cpu_done);
-	list_add_tail(&req->donelist, cpu_list);
-	raise_softirq_irqoff(BLOCK_SOFTIRQ);
-
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL(blk_complete_request);
-
-/*
  * queue lock must be held
  */
 static void end_that_request_last(struct request *req, int error)
 {
 	struct gendisk *disk = req->rq_disk;
 
+	blk_delete_timer(req);
+
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
 
@@ -1740,16 +1766,18 @@
 	if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
-		struct hd_struct *part = get_part(disk, req->sector);
+		struct hd_struct *part;
+		int cpu;
 
-		__all_stat_inc(disk, part, ios[rw], req->sector);
-		__all_stat_add(disk, part, ticks[rw], duration, req->sector);
-		disk_round_stats(disk);
-		disk->in_flight--;
-		if (part) {
-			part_round_stats(part);
-			part->in_flight--;
-		}
+		cpu = part_stat_lock();
+		part = disk_map_sector_rcu(disk, req->sector);
+
+		part_stat_inc(cpu, part, ios[rw]);
+		part_stat_add(cpu, part, ticks[rw], duration);
+		part_round_stats(cpu, part);
+		part_dec_in_flight(part);
+
+		part_stat_unlock();
 	}
 
 	if (req->end_io)
@@ -1762,17 +1790,6 @@
 	}
 }
 
-static inline void __end_request(struct request *rq, int uptodate,
-				 unsigned int nr_bytes)
-{
-	int error = 0;
-
-	if (uptodate <= 0)
-		error = uptodate ? uptodate : -EIO;
-
-	__blk_end_request(rq, error, nr_bytes);
-}
-
 /**
  * blk_rq_bytes - Returns bytes left to complete in the entire request
  * @rq: the request being processed
@@ -1803,92 +1820,36 @@
 EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
 
 /**
- * end_queued_request - end all I/O on a queued request
- * @rq:		the request being processed
- * @uptodate:	error value or 0/1 uptodate flag
- *
- * Description:
- *     Ends all I/O on a request, and removes it from the block layer queues.
- *     Not suitable for normal IO completion, unless the driver still has
- *     the request attached to the block layer.
- *
- **/
-void end_queued_request(struct request *rq, int uptodate)
-{
-	__end_request(rq, uptodate, blk_rq_bytes(rq));
-}
-EXPORT_SYMBOL(end_queued_request);
-
-/**
- * end_dequeued_request - end all I/O on a dequeued request
- * @rq:		the request being processed
- * @uptodate:	error value or 0/1 uptodate flag
- *
- * Description:
- *     Ends all I/O on a request. The request must already have been
- *     dequeued using blkdev_dequeue_request(), as is normally the case
- *     for most drivers.
- *
- **/
-void end_dequeued_request(struct request *rq, int uptodate)
-{
-	__end_request(rq, uptodate, blk_rq_bytes(rq));
-}
-EXPORT_SYMBOL(end_dequeued_request);
-
-
-/**
  * end_request - end I/O on the current segment of the request
  * @req:	the request being processed
- * @uptodate:	error value or 0/1 uptodate flag
+ * @uptodate:	error value or %0/%1 uptodate flag
  *
  * Description:
  *     Ends I/O on the current segment of a request. If that is the only
  *     remaining segment, the request is also completed and freed.
  *
- *     This is a remnant of how older block drivers handled IO completions.
- *     Modern drivers typically end IO on the full request in one go, unless
+ *     This is a remnant of how older block drivers handled I/O completions.
+ *     Modern drivers typically end I/O on the full request in one go, unless
  *     they have a residual value to account for. For that case this function
  *     isn't really useful, unless the residual just happens to be the
  *     full current segment. In other words, don't use this function in new
- *     code. Either use end_request_completely(), or the
- *     end_that_request_chunk() (along with end_that_request_last()) for
- *     partial completions.
- *
+ *     code. Use blk_end_request() or __blk_end_request() to end a request.
  **/
 void end_request(struct request *req, int uptodate)
 {
-	__end_request(req, uptodate, req->hard_cur_sectors << 9);
+	int error = 0;
+
+	if (uptodate <= 0)
+		error = uptodate ? uptodate : -EIO;
+
+	__blk_end_request(req, error, req->hard_cur_sectors << 9);
 }
 EXPORT_SYMBOL(end_request);
 
-/**
- * blk_end_io - Generic end_io function to complete a request.
- * @rq:           the request being processed
- * @error:        0 for success, < 0 for error
- * @nr_bytes:     number of bytes to complete @rq
- * @bidi_bytes:   number of bytes to complete @rq->next_rq
- * @drv_callback: function called between completion of bios in the request
- *                and completion of the request.
- *                If the callback returns non 0, this helper returns without
- *                completion of the request.
- *
- * Description:
- *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
- *     If @rq has leftover, sets it up for the next range of segments.
- *
- * Return:
- *     0 - we are done with this request
- *     1 - this request is not freed yet, it still has pending buffers.
- **/
-static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
-		      unsigned int bidi_bytes,
-		      int (drv_callback)(struct request *))
+static int end_that_request_data(struct request *rq, int error,
+				 unsigned int nr_bytes, unsigned int bidi_bytes)
 {
-	struct request_queue *q = rq->q;
-	unsigned long flags = 0UL;
-
-	if (blk_fs_request(rq) || blk_pc_request(rq)) {
+	if (rq->bio) {
 		if (__end_that_request_first(rq, error, nr_bytes))
 			return 1;
 
@@ -1898,6 +1859,38 @@
 			return 1;
 	}
 
+	return 0;
+}
+
+/**
+ * blk_end_io - Generic end_io function to complete a request.
+ * @rq:           the request being processed
+ * @error:        %0 for success, < %0 for error
+ * @nr_bytes:     number of bytes to complete @rq
+ * @bidi_bytes:   number of bytes to complete @rq->next_rq
+ * @drv_callback: function called between completion of bios in the request
+ *                and completion of the request.
+ *                If the callback returns non %0, this helper returns without
+ *                completion of the request.
+ *
+ * Description:
+ *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
+ *     If @rq has leftover, sets it up for the next range of segments.
+ *
+ * Return:
+ *     %0 - we are done with this request
+ *     %1 - this request is not freed yet, it still has pending buffers.
+ **/
+static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
+		      unsigned int bidi_bytes,
+		      int (drv_callback)(struct request *))
+{
+	struct request_queue *q = rq->q;
+	unsigned long flags = 0UL;
+
+	if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
+		return 1;
+
 	/* Special feature for tricky drivers */
 	if (drv_callback && drv_callback(rq))
 		return 1;
@@ -1914,7 +1907,7 @@
 /**
  * blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    0 for success, < 0 for error
+ * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -1922,8 +1915,8 @@
  *     If @rq has leftover, sets it up for the next range of segments.
  *
  * Return:
- *     0 - we are done with this request
- *     1 - still buffers pending for this request
+ *     %0 - we are done with this request
+ *     %1 - still buffers pending for this request
  **/
 int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
@@ -1934,22 +1927,20 @@
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    0 for success, < 0 for error
+ * @error:    %0 for success, < %0 for error
  * @nr_bytes: number of bytes to complete
  *
  * Description:
  *     Must be called with queue lock held unlike blk_end_request().
  *
  * Return:
- *     0 - we are done with this request
- *     1 - still buffers pending for this request
+ *     %0 - we are done with this request
+ *     %1 - still buffers pending for this request
  **/
 int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
 {
-	if (blk_fs_request(rq) || blk_pc_request(rq)) {
-		if (__end_that_request_first(rq, error, nr_bytes))
-			return 1;
-	}
+	if (rq->bio && __end_that_request_first(rq, error, nr_bytes))
+		return 1;
 
 	add_disk_randomness(rq->rq_disk);
 
@@ -1962,7 +1953,7 @@
 /**
  * blk_end_bidi_request - Helper function for drivers to complete bidi request.
  * @rq:         the bidi request being processed
- * @error:      0 for success, < 0 for error
+ * @error:      %0 for success, < %0 for error
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
@@ -1970,8 +1961,8 @@
  *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
  *
  * Return:
- *     0 - we are done with this request
- *     1 - still buffers pending for this request
+ *     %0 - we are done with this request
+ *     %1 - still buffers pending for this request
  **/
 int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
 			 unsigned int bidi_bytes)
@@ -1981,13 +1972,43 @@
 EXPORT_SYMBOL_GPL(blk_end_bidi_request);
 
 /**
+ * blk_update_request - Special helper function for request stacking drivers
+ * @rq:           the request being processed
+ * @error:        %0 for success, < %0 for error
+ * @nr_bytes:     number of bytes to complete @rq
+ *
+ * Description:
+ *     Ends I/O on a number of bytes attached to @rq, but doesn't complete
+ *     the request structure even if @rq doesn't have leftover.
+ *     If @rq has leftover, sets it up for the next range of segments.
+ *
+ *     This special helper function is only for request stacking drivers
+ *     (e.g. request-based dm) so that they can handle partial completion.
+ *     Actual device drivers should use blk_end_request instead.
+ */
+void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)
+{
+	if (!end_that_request_data(rq, error, nr_bytes, 0)) {
+		/*
+		 * These members are not updated in end_that_request_data()
+		 * when all bios are completed.
+		 * Update them so that the request stacking driver can find
+		 * how many bytes remain in the request later.
+		 */
+		rq->nr_sectors = rq->hard_nr_sectors = 0;
+		rq->current_nr_sectors = rq->hard_cur_sectors = 0;
+	}
+}
+EXPORT_SYMBOL_GPL(blk_update_request);
+
+/**
  * blk_end_request_callback - Special helper function for tricky drivers
  * @rq:           the request being processed
- * @error:        0 for success, < 0 for error
+ * @error:        %0 for success, < %0 for error
  * @nr_bytes:     number of bytes to complete
  * @drv_callback: function called between completion of bios in the request
  *                and completion of the request.
- *                If the callback returns non 0, this helper returns without
+ *                If the callback returns non %0, this helper returns without
  *                completion of the request.
  *
  * Description:
@@ -2000,10 +2021,10 @@
  *     Don't use this interface in other places anymore.
  *
  * Return:
- *     0 - we are done with this request
- *     1 - this request is not freed yet.
- *         this request still has pending buffers or
- *         the driver doesn't want to finish this request yet.
+ *     %0 - we are done with this request
+ *     %1 - this request is not freed yet.
+ *          this request still has pending buffers or
+ *          the driver doesn't want to finish this request yet.
  **/
 int blk_end_request_callback(struct request *rq, int error,
 			     unsigned int nr_bytes,
@@ -2016,15 +2037,17 @@
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
 {
-	/* first two bits are identical in rq->cmd_flags and bio->bi_rw */
+	/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and
+	   we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
 	rq->cmd_flags |= (bio->bi_rw & 3);
 
-	rq->nr_phys_segments = bio_phys_segments(q, bio);
-	rq->nr_hw_segments = bio_hw_segments(q, bio);
+	if (bio_has_data(bio)) {
+		rq->nr_phys_segments = bio_phys_segments(q, bio);
+		rq->buffer = bio_data(bio);
+	}
 	rq->current_nr_sectors = bio_cur_sectors(bio);
 	rq->hard_cur_sectors = rq->current_nr_sectors;
 	rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
-	rq->buffer = bio_data(bio);
 	rq->data_len = bio->bi_size;
 
 	rq->bio = rq->biotail = bio;
@@ -2033,7 +2056,35 @@
 		rq->rq_disk = bio->bi_bdev->bd_disk;
 }
 
-int kblockd_schedule_work(struct work_struct *work)
+/**
+ * blk_lld_busy - Check if underlying low-level drivers of a device are busy
+ * @q : the queue of the device being checked
+ *
+ * Description:
+ *    Check if underlying low-level drivers of a device are busy.
+ *    If the drivers want to export their busy state, they must set own
+ *    exporting function using blk_queue_lld_busy() first.
+ *
+ *    Basically, this function is used only by request stacking drivers
+ *    to stop dispatching requests to underlying devices when underlying
+ *    devices are busy.  This behavior helps more I/O merging on the queue
+ *    of the request stacking driver and prevents I/O throughput regression
+ *    on burst I/O load.
+ *
+ * Return:
+ *    0 - Not busy (The request stacking driver should dispatch request)
+ *    1 - Busy (The request stacking driver should stop dispatching request)
+ */
+int blk_lld_busy(struct request_queue *q)
+{
+	if (q->lld_busy_fn)
+		return q->lld_busy_fn(q);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blk_lld_busy);
+
+int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 {
 	return queue_work(kblockd_workqueue, work);
 }
@@ -2047,8 +2098,6 @@
 
 int __init blk_dev_init(void)
 {
-	int i;
-
 	kblockd_workqueue = create_workqueue("kblockd");
 	if (!kblockd_workqueue)
 		panic("Failed to create kblockd\n");
@@ -2059,12 +2108,6 @@
 	blk_requestq_cachep = kmem_cache_create("blkdev_queue",
 			sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
 
-	for_each_possible_cpu(i)
-		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
-
-	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
-	register_hotcpu_notifier(&blk_cpu_notifier);
-
 	return 0;
 }
 

diff --git a/block/blk-exec.c b/block/blk-exec.c
index 9bceff7..6af716d 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c

@@ -16,7 +16,7 @@
 /**
  * blk_end_sync_rq - executes a completion event on a request
  * @rq: request to complete
- * @error: end io status of the request
+ * @error: end I/O status of the request
  */
 static void blk_end_sync_rq(struct request *rq, int error)
 {
@@ -41,7 +41,7 @@
  * @done:	I/O completion handler
  *
  * Description:
- *    Insert a fully prepared request at the back of the io scheduler queue
+ *    Insert a fully prepared request at the back of the I/O scheduler queue
  *    for execution.  Don't wait for completion.
  */
 void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
@@ -72,7 +72,7 @@
  * @at_head:    insert request at head or tail of queue
  *
  * Description:
- *    Insert a fully prepared request at the back of the io scheduler queue
+ *    Insert a fully prepared request at the back of the I/O scheduler queue
  *    for execution and wait for completion.
  */
 int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,

diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 3f1a847..61a8e2f 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c

@@ -108,51 +108,51 @@
 EXPORT_SYMBOL(blk_rq_map_integrity_sg);
 
 /**
- * blk_integrity_compare - Compare integrity profile of two block devices
- * @b1:		Device to compare
- * @b2:		Device to compare
+ * blk_integrity_compare - Compare integrity profile of two disks
+ * @gd1:	Disk to compare
+ * @gd2:	Disk to compare
  *
  * Description: Meta-devices like DM and MD need to verify that all
  * sub-devices use the same integrity format before advertising to
  * upper layers that they can send/receive integrity metadata.  This
- * function can be used to check whether two block devices have
+ * function can be used to check whether two gendisk devices have
  * compatible integrity formats.
  */
-int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2)
+int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
 {
-	struct blk_integrity *b1 = bd1->bd_disk->integrity;
-	struct blk_integrity *b2 = bd2->bd_disk->integrity;
+	struct blk_integrity *b1 = gd1->integrity;
+	struct blk_integrity *b2 = gd2->integrity;
 
-	BUG_ON(bd1->bd_disk == NULL);
-	BUG_ON(bd2->bd_disk == NULL);
+	if (!b1 && !b2)
+		return 0;
 
 	if (!b1 || !b2)
-		return 0;
+		return -1;
 
 	if (b1->sector_size != b2->sector_size) {
 		printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
-		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+		       gd1->disk_name, gd2->disk_name,
 		       b1->sector_size, b2->sector_size);
 		return -1;
 	}
 
 	if (b1->tuple_size != b2->tuple_size) {
 		printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
-		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+		       gd1->disk_name, gd2->disk_name,
 		       b1->tuple_size, b2->tuple_size);
 		return -1;
 	}
 
 	if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
 		printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
-		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+		       gd1->disk_name, gd2->disk_name,
 		       b1->tag_size, b2->tag_size);
 		return -1;
 	}
 
 	if (strcmp(b1->name, b2->name)) {
 		printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
-		       bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+		       gd1->disk_name, gd2->disk_name,
 		       b1->name, b2->name);
 		return -1;
 	}
@@ -331,7 +331,8 @@
 			return -1;
 
 		if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
-					 &disk->dev.kobj, "%s", "integrity")) {
+					 &disk_to_dev(disk)->kobj,
+					 "%s", "integrity")) {
 			kmem_cache_free(integrity_cachep, bi);
 			return -1;
 		}
@@ -375,7 +376,7 @@
 
 	kobject_uevent(&bi->kobj, KOBJ_REMOVE);
 	kobject_del(&bi->kobj);
-	kobject_put(&disk->dev.kobj);
 	kmem_cache_free(integrity_cachep, bi);
+	disk->integrity = NULL;
 }
 EXPORT_SYMBOL(blk_integrity_unregister);

diff --git a/block/blk-map.c b/block/blk-map.c
index af37e4a..4849fa3 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c

@@ -41,10 +41,10 @@
 }
 
 static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
-			     void __user *ubuf, unsigned int len)
+			     struct rq_map_data *map_data, void __user *ubuf,
+			     unsigned int len, int null_mapped, gfp_t gfp_mask)
 {
 	unsigned long uaddr;
-	unsigned int alignment;
 	struct bio *bio, *orig_bio;
 	int reading, ret;
 
@@ -55,15 +55,17 @@
 	 * direct dma. else, set up kernel bounce buffers
 	 */
 	uaddr = (unsigned long) ubuf;
-	alignment = queue_dma_alignment(q) | q->dma_pad_mask;
-	if (!(uaddr & alignment) && !(len & alignment))
-		bio = bio_map_user(q, NULL, uaddr, len, reading);
+	if (blk_rq_aligned(q, ubuf, len) && !map_data)
+		bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
 	else
-		bio = bio_copy_user(q, uaddr, len, reading);
+		bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask);
 
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
+	if (null_mapped)
+		bio->bi_flags |= (1 << BIO_NULL_MAPPED);
+
 	orig_bio = bio;
 	blk_queue_bounce(q, &bio);
 
@@ -85,17 +87,19 @@
 }
 
 /**
- * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage
+ * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage
  * @q:		request queue where request should be inserted
  * @rq:		request structure to fill
+ * @map_data:   pointer to the rq_map_data holding pages (if necessary)
  * @ubuf:	the user buffer
  * @len:	length of user data
+ * @gfp_mask:	memory allocation flags
  *
  * Description:
- *    Data will be mapped directly for zero copy io, if possible. Otherwise
+ *    Data will be mapped directly for zero copy I/O, if possible. Otherwise
  *    a kernel bounce buffer is used.
  *
- *    A matching blk_rq_unmap_user() must be issued at the end of io, while
+ *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
  *    still in process context.
  *
  *    Note: The mapped bio may need to be bounced through blk_queue_bounce()
@@ -105,16 +109,22 @@
  *    unmapping.
  */
 int blk_rq_map_user(struct request_queue *q, struct request *rq,
-		    void __user *ubuf, unsigned long len)
+		    struct rq_map_data *map_data, void __user *ubuf,
+		    unsigned long len, gfp_t gfp_mask)
 {
 	unsigned long bytes_read = 0;
 	struct bio *bio = NULL;
-	int ret;
+	int ret, null_mapped = 0;
 
 	if (len > (q->max_hw_sectors << 9))
 		return -EINVAL;
-	if (!len || !ubuf)
+	if (!len)
 		return -EINVAL;
+	if (!ubuf) {
+		if (!map_data || rq_data_dir(rq) != READ)
+			return -EINVAL;
+		null_mapped = 1;
+	}
 
 	while (bytes_read != len) {
 		unsigned long map_len, end, start;
@@ -132,7 +142,8 @@
 		if (end - start > BIO_MAX_PAGES)
 			map_len -= PAGE_SIZE;
 
-		ret = __blk_rq_map_user(q, rq, ubuf, map_len);
+		ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len,
+					null_mapped, gfp_mask);
 		if (ret < 0)
 			goto unmap_rq;
 		if (!bio)
@@ -154,18 +165,20 @@
 EXPORT_SYMBOL(blk_rq_map_user);
 
 /**
- * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage
+ * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
  * @q:		request queue where request should be inserted
  * @rq:		request to map data to
+ * @map_data:   pointer to the rq_map_data holding pages (if necessary)
  * @iov:	pointer to the iovec
  * @iov_count:	number of elements in the iovec
  * @len:	I/O byte count
+ * @gfp_mask:	memory allocation flags
  *
  * Description:
- *    Data will be mapped directly for zero copy io, if possible. Otherwise
+ *    Data will be mapped directly for zero copy I/O, if possible. Otherwise
  *    a kernel bounce buffer is used.
  *
- *    A matching blk_rq_unmap_user() must be issued at the end of io, while
+ *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
  *    still in process context.
  *
  *    Note: The mapped bio may need to be bounced through blk_queue_bounce()
@@ -175,7 +188,8 @@
  *    unmapping.
  */
 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
-			struct sg_iovec *iov, int iov_count, unsigned int len)
+			struct rq_map_data *map_data, struct sg_iovec *iov,
+			int iov_count, unsigned int len, gfp_t gfp_mask)
 {
 	struct bio *bio;
 	int i, read = rq_data_dir(rq) == READ;
@@ -193,10 +207,11 @@
 		}
 	}
 
-	if (unaligned || (q->dma_pad_mask & len))
-		bio = bio_copy_user_iov(q, iov, iov_count, read);
+	if (unaligned || (q->dma_pad_mask & len) || map_data)
+		bio = bio_copy_user_iov(q, map_data, iov, iov_count, read,
+					gfp_mask);
 	else
-		bio = bio_map_user_iov(q, NULL, iov, iov_count, read);
+		bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask);
 
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
@@ -216,6 +231,7 @@
 	rq->buffer = rq->data = NULL;
 	return 0;
 }
+EXPORT_SYMBOL(blk_rq_map_user_iov);
 
 /**
  * blk_rq_unmap_user - unmap a request with user data
@@ -224,7 +240,7 @@
  * Description:
  *    Unmap a rq previously mapped by blk_rq_map_user(). The caller must
  *    supply the original rq->bio from the blk_rq_map_user() return, since
- *    the io completion may have changed rq->bio.
+ *    the I/O completion may have changed rq->bio.
  */
 int blk_rq_unmap_user(struct bio *bio)
 {
@@ -250,7 +266,7 @@
 EXPORT_SYMBOL(blk_rq_unmap_user);
 
 /**
- * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage
+ * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage
  * @q:		request queue where request should be inserted
  * @rq:		request to fill
  * @kbuf:	the kernel buffer
@@ -264,8 +280,6 @@
 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 		    unsigned int len, gfp_t gfp_mask)
 {
-	unsigned long kaddr;
-	unsigned int alignment;
 	int reading = rq_data_dir(rq) == READ;
 	int do_copy = 0;
 	struct bio *bio;
@@ -275,11 +289,7 @@
 	if (!len || !kbuf)
 		return -EINVAL;
 
-	kaddr = (unsigned long)kbuf;
-	alignment = queue_dma_alignment(q) | q->dma_pad_mask;
-	do_copy = ((kaddr & alignment) || (len & alignment) ||
-		   object_is_on_stack(kbuf));
-
+	do_copy = !blk_rq_aligned(q, kbuf, len) || object_is_on_stack(kbuf);
 	if (do_copy)
 		bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
 	else

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 5efc9e7..908d3e1 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c

@@ -11,7 +11,7 @@
 
 void blk_recalc_rq_sectors(struct request *rq, int nsect)
 {
-	if (blk_fs_request(rq)) {
+	if (blk_fs_request(rq) || blk_discard_rq(rq)) {
 		rq->hard_sector += nsect;
 		rq->hard_nr_sectors -= nsect;
 
@@ -41,12 +41,9 @@
 void blk_recalc_rq_segments(struct request *rq)
 {
 	int nr_phys_segs;
-	int nr_hw_segs;
 	unsigned int phys_size;
-	unsigned int hw_size;
 	struct bio_vec *bv, *bvprv = NULL;
 	int seg_size;
-	int hw_seg_size;
 	int cluster;
 	struct req_iterator iter;
 	int high, highprv = 1;
@@ -56,8 +53,8 @@
 		return;
 
 	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
-	hw_seg_size = seg_size = 0;
-	phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
+	seg_size = 0;
+	phys_size = nr_phys_segs = 0;
 	rq_for_each_segment(bv, rq, iter) {
 		/*
 		 * the trick here is making sure that a high page is never
@@ -66,7 +63,7 @@
 		 */
 		high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
 		if (high || highprv)
-			goto new_hw_segment;
+			goto new_segment;
 		if (cluster) {
 			if (seg_size + bv->bv_len > q->max_segment_size)
 				goto new_segment;
@@ -74,40 +71,19 @@
 				goto new_segment;
 			if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
 				goto new_segment;
-			if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
-				goto new_hw_segment;
 
 			seg_size += bv->bv_len;
-			hw_seg_size += bv->bv_len;
 			bvprv = bv;
 			continue;
 		}
 new_segment:
-		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
-		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
-			hw_seg_size += bv->bv_len;
-		else {
-new_hw_segment:
-			if (nr_hw_segs == 1 &&
-			    hw_seg_size > rq->bio->bi_hw_front_size)
-				rq->bio->bi_hw_front_size = hw_seg_size;
-			hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
-			nr_hw_segs++;
-		}
-
 		nr_phys_segs++;
 		bvprv = bv;
 		seg_size = bv->bv_len;
 		highprv = high;
 	}
 
-	if (nr_hw_segs == 1 &&
-	    hw_seg_size > rq->bio->bi_hw_front_size)
-		rq->bio->bi_hw_front_size = hw_seg_size;
-	if (hw_seg_size > rq->biotail->bi_hw_back_size)
-		rq->biotail->bi_hw_back_size = hw_seg_size;
 	rq->nr_phys_segments = nr_phys_segs;
-	rq->nr_hw_segments = nr_hw_segs;
 }
 
 void blk_recount_segments(struct request_queue *q, struct bio *bio)
@@ -120,7 +96,6 @@
 	blk_recalc_rq_segments(&rq);
 	bio->bi_next = nxt;
 	bio->bi_phys_segments = rq.nr_phys_segments;
-	bio->bi_hw_segments = rq.nr_hw_segments;
 	bio->bi_flags |= (1 << BIO_SEG_VALID);
 }
 EXPORT_SYMBOL(blk_recount_segments);
@@ -131,13 +106,17 @@
 	if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
 		return 0;
 
-	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
-		return 0;
 	if (bio->bi_size + nxt->bi_size > q->max_segment_size)
 		return 0;
 
+	if (!bio_has_data(bio))
+		return 1;
+
+	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
+		return 0;
+
 	/*
-	 * bio and nxt are contigous in memory, check if the queue allows
+	 * bio and nxt are contiguous in memory; check if the queue allows
 	 * these two to be merged into one
 	 */
 	if (BIO_SEG_BOUNDARY(q, bio, nxt))
@@ -146,22 +125,6 @@
 	return 0;
 }
 
-static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
-				 struct bio *nxt)
-{
-	if (!bio_flagged(bio, BIO_SEG_VALID))
-		blk_recount_segments(q, bio);
-	if (!bio_flagged(nxt, BIO_SEG_VALID))
-		blk_recount_segments(q, nxt);
-	if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
-	    BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
-		return 0;
-	if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size)
-		return 0;
-
-	return 1;
-}
-
 /*
  * map a request to scatterlist, return number of sg entries setup. Caller
  * must make sure sg can hold rq->nr_phys_segments entries
@@ -275,10 +238,9 @@
 				    struct request *req,
 				    struct bio *bio)
 {
-	int nr_hw_segs = bio_hw_segments(q, bio);
 	int nr_phys_segs = bio_phys_segments(q, bio);
 
-	if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
+	if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
 	    || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
@@ -290,7 +252,6 @@
 	 * This will form the start of a new hw segment.  Bump both
 	 * counters.
 	 */
-	req->nr_hw_segments += nr_hw_segs;
 	req->nr_phys_segments += nr_phys_segs;
 	return 1;
 }
@@ -299,7 +260,6 @@
 		     struct bio *bio)
 {
 	unsigned short max_sectors;
-	int len;
 
 	if (unlikely(blk_pc_request(req)))
 		max_sectors = q->max_hw_sectors;
@@ -316,19 +276,6 @@
 		blk_recount_segments(q, req->biotail);
 	if (!bio_flagged(bio, BIO_SEG_VALID))
 		blk_recount_segments(q, bio);
-	len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
-	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
-	    && !BIOVEC_VIRT_OVERSIZE(len)) {
-		int mergeable =  ll_new_mergeable(q, req, bio);
-
-		if (mergeable) {
-			if (req->nr_hw_segments == 1)
-				req->bio->bi_hw_front_size = len;
-			if (bio->bi_hw_segments == 1)
-				bio->bi_hw_back_size = len;
-		}
-		return mergeable;
-	}
 
 	return ll_new_hw_segment(q, req, bio);
 }
@@ -337,7 +284,6 @@
 		      struct bio *bio)
 {
 	unsigned short max_sectors;
-	int len;
 
 	if (unlikely(blk_pc_request(req)))
 		max_sectors = q->max_hw_sectors;
@@ -351,23 +297,10 @@
 			q->last_merge = NULL;
 		return 0;
 	}
-	len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
 	if (!bio_flagged(bio, BIO_SEG_VALID))
 		blk_recount_segments(q, bio);
 	if (!bio_flagged(req->bio, BIO_SEG_VALID))
 		blk_recount_segments(q, req->bio);
-	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
-	    !BIOVEC_VIRT_OVERSIZE(len)) {
-		int mergeable =  ll_new_mergeable(q, req, bio);
-
-		if (mergeable) {
-			if (bio->bi_hw_segments == 1)
-				bio->bi_hw_front_size = len;
-			if (req->nr_hw_segments == 1)
-				req->biotail->bi_hw_back_size = len;
-		}
-		return mergeable;
-	}
 
 	return ll_new_hw_segment(q, req, bio);
 }
@@ -376,7 +309,6 @@
 				struct request *next)
 {
 	int total_phys_segments;
-	int total_hw_segments;
 
 	/*
 	 * First check if the either of the requests are re-queued
@@ -398,26 +330,11 @@
 	if (total_phys_segments > q->max_phys_segments)
 		return 0;
 
-	total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
-	if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
-		int len = req->biotail->bi_hw_back_size +
-				next->bio->bi_hw_front_size;
-		/*
-		 * propagate the combined length to the end of the requests
-		 */
-		if (req->nr_hw_segments == 1)
-			req->bio->bi_hw_front_size = len;
-		if (next->nr_hw_segments == 1)
-			next->biotail->bi_hw_back_size = len;
-		total_hw_segments--;
-	}
-
-	if (total_hw_segments > q->max_hw_segments)
+	if (total_phys_segments > q->max_hw_segments)
 		return 0;
 
 	/* Merge is OK... */
 	req->nr_phys_segments = total_phys_segments;
-	req->nr_hw_segments = total_hw_segments;
 	return 1;
 }
 
@@ -470,17 +387,21 @@
 	elv_merge_requests(q, req, next);
 
 	if (req->rq_disk) {
-		struct hd_struct *part
-			= get_part(req->rq_disk, req->sector);
-		disk_round_stats(req->rq_disk);
-		req->rq_disk->in_flight--;
-		if (part) {
-			part_round_stats(part);
-			part->in_flight--;
-		}
+		struct hd_struct *part;
+		int cpu;
+
+		cpu = part_stat_lock();
+		part = disk_map_sector_rcu(req->rq_disk, req->sector);
+
+		part_round_stats(cpu, part);
+		part_dec_in_flight(part);
+
+		part_stat_unlock();
 	}
 
 	req->ioprio = ioprio_best(req->ioprio, next->ioprio);
+	if (blk_rq_cpu_valid(next))
+		req->cpu = next->cpu;
 
 	__blk_put_request(q, next);
 	return 1;

diff --git a/block/blk-settings.c b/block/blk-settings.c
index dfc7701..b21dcdb 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c

@@ -33,6 +33,23 @@
 EXPORT_SYMBOL(blk_queue_prep_rq);
 
 /**
+ * blk_queue_set_discard - set a discard_sectors function for queue
+ * @q:		queue
+ * @dfn:	prepare_discard function
+ *
+ * It's possible for a queue to register a discard callback which is used
+ * to transform a discard request into the appropriate type for the
+ * hardware. If none is registered, then discard requests are failed
+ * with %EOPNOTSUPP.
+ *
+ */
+void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
+{
+	q->prepare_discard_fn = dfn;
+}
+EXPORT_SYMBOL(blk_queue_set_discard);
+
+/**
  * blk_queue_merge_bvec - set a merge_bvec function for queue
  * @q:		queue
  * @mbfn:	merge_bvec_fn
@@ -60,6 +77,24 @@
 }
 EXPORT_SYMBOL(blk_queue_softirq_done);
 
+void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
+{
+	q->rq_timeout = timeout;
+}
+EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
+
+void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
+{
+	q->rq_timed_out_fn = fn;
+}
+EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
+
+void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
+{
+	q->lld_busy_fn = fn;
+}
+EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
+
 /**
  * blk_queue_make_request - define an alternate make_request function for a device
  * @q:  the request queue for the device to be affected
@@ -127,7 +162,7 @@
  *    Different hardware can have different requirements as to what pages
  *    it can do I/O directly to. A low level driver can call
  *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
- *    buffers for doing I/O to pages residing above @page.
+ *    buffers for doing I/O to pages residing above @dma_addr.
  **/
 void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr)
 {
@@ -212,7 +247,7 @@
  * Description:
  *    Enables a low level driver to set an upper limit on the number of
  *    hw data segments in a request.  This would be the largest number of
- *    address/length pairs the host adapter can actually give as once
+ *    address/length pairs the host adapter can actually give at once
  *    to the device.
  **/
 void blk_queue_max_hw_segments(struct request_queue *q,
@@ -393,7 +428,7 @@
  * @mask:  alignment mask
  *
  * description:
- *    set required memory and length aligment for direct dma transactions.
+ *    set required memory and length alignment for direct dma transactions.
  *    this is used when buiding direct io requests for the queue.
  *
  **/
@@ -409,7 +444,7 @@
  * @mask:  alignment mask
  *
  * description:
- *    update required memory and length aligment for direct dma transactions.
+ *    update required memory and length alignment for direct dma transactions.
  *    If the requested alignment is larger than the current alignment, then
  *    the current queue alignment is updated to the new value, otherwise it
  *    is left alone.  The design of this is to allow multiple objects

diff --git a/block/blk-softirq.c b/block/blk-softirq.c
new file mode 100644
index 0000000..e660d26
--- /dev/null
+++ b/block/blk-softirq.c

@@ -0,0 +1,175 @@
+/*
+ * Functions related to softirq rq completions
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+
+#include "blk.h"
+
+static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
+
+/*
+ * Softirq action handler - move entries to local list and loop over them
+ * while passing them to the queue registered handler.
+ */
+static void blk_done_softirq(struct softirq_action *h)
+{
+	struct list_head *cpu_list, local_list;
+
+	local_irq_disable();
+	cpu_list = &__get_cpu_var(blk_cpu_done);
+	list_replace_init(cpu_list, &local_list);
+	local_irq_enable();
+
+	while (!list_empty(&local_list)) {
+		struct request *rq;
+
+		rq = list_entry(local_list.next, struct request, csd.list);
+		list_del_init(&rq->csd.list);
+		rq->q->softirq_done_fn(rq);
+	}
+}
+
+#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+static void trigger_softirq(void *data)
+{
+	struct request *rq = data;
+	unsigned long flags;
+	struct list_head *list;
+
+	local_irq_save(flags);
+	list = &__get_cpu_var(blk_cpu_done);
+	list_add_tail(&rq->csd.list, list);
+
+	if (list->next == &rq->csd.list)
+		raise_softirq_irqoff(BLOCK_SOFTIRQ);
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Setup and invoke a run of 'trigger_softirq' on the given cpu.
+ */
+static int raise_blk_irq(int cpu, struct request *rq)
+{
+	if (cpu_online(cpu)) {
+		struct call_single_data *data = &rq->csd;
+
+		data->func = trigger_softirq;
+		data->info = rq;
+		data->flags = 0;
+
+		__smp_call_function_single(cpu, data);
+		return 0;
+	}
+
+	return 1;
+}
+#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
+static int raise_blk_irq(int cpu, struct request *rq)
+{
+	return 1;
+}
+#endif
+
+static int __cpuinit blk_cpu_notify(struct notifier_block *self,
+				    unsigned long action, void *hcpu)
+{
+	/*
+	 * If a CPU goes away, splice its entries to the current CPU
+	 * and trigger a run of the softirq
+	 */
+	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+		int cpu = (unsigned long) hcpu;
+
+		local_irq_disable();
+		list_splice_init(&per_cpu(blk_cpu_done, cpu),
+				 &__get_cpu_var(blk_cpu_done));
+		raise_softirq_irqoff(BLOCK_SOFTIRQ);
+		local_irq_enable();
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata blk_cpu_notifier = {
+	.notifier_call	= blk_cpu_notify,
+};
+
+void __blk_complete_request(struct request *req)
+{
+	struct request_queue *q = req->q;
+	unsigned long flags;
+	int ccpu, cpu, group_cpu;
+
+	BUG_ON(!q->softirq_done_fn);
+
+	local_irq_save(flags);
+	cpu = smp_processor_id();
+	group_cpu = blk_cpu_to_group(cpu);
+
+	/*
+	 * Select completion CPU
+	 */
+	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
+		ccpu = req->cpu;
+	else
+		ccpu = cpu;
+
+	if (ccpu == cpu || ccpu == group_cpu) {
+		struct list_head *list;
+do_local:
+		list = &__get_cpu_var(blk_cpu_done);
+		list_add_tail(&req->csd.list, list);
+
+		/*
+		 * if the list only contains our just added request,
+		 * signal a raise of the softirq. If there are already
+		 * entries there, someone already raised the irq but it
+		 * hasn't run yet.
+		 */
+		if (list->next == &req->csd.list)
+			raise_softirq_irqoff(BLOCK_SOFTIRQ);
+	} else if (raise_blk_irq(ccpu, req))
+		goto do_local;
+
+	local_irq_restore(flags);
+}
+
+/**
+ * blk_complete_request - end I/O on a request
+ * @req:      the request being processed
+ *
+ * Description:
+ *     Ends all I/O on a request. It does not handle partial completions,
+ *     unless the driver actually implements this in its completion callback
+ *     through requeueing. The actual completion happens out-of-order,
+ *     through a softirq handler. The user must have registered a completion
+ *     callback through blk_queue_softirq_done().
+ **/
+void blk_complete_request(struct request *req)
+{
+	if (unlikely(blk_should_fake_timeout(req->q)))
+		return;
+	if (!blk_mark_rq_complete(req))
+		__blk_complete_request(req);
+}
+EXPORT_SYMBOL(blk_complete_request);
+
+__init int blk_softirq_init(void)
+{
+	int i;
+
+	for_each_possible_cpu(i)
+		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
+
+	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
+	register_hotcpu_notifier(&blk_cpu_notifier);
+	return 0;
+}
+subsys_initcall(blk_softirq_init);

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 304ec73..21e275d 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c

@@ -156,6 +156,30 @@
 	return ret;
 }
 
+static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
+{
+	unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
+
+	return queue_var_show(set != 0, page);
+}
+
+static ssize_t
+queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
+{
+	ssize_t ret = -EINVAL;
+#if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+	unsigned long val;
+
+	ret = queue_var_store(&val, page, count);
+	spin_lock_irq(q->queue_lock);
+	if (val)
+		queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
+	else
+		queue_flag_clear(QUEUE_FLAG_SAME_COMP,  q);
+	spin_unlock_irq(q->queue_lock);
+#endif
+	return ret;
+}
 
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
@@ -197,6 +221,12 @@
 	.store = queue_nomerges_store,
 };
 
+static struct queue_sysfs_entry queue_rq_affinity_entry = {
+	.attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_rq_affinity_show,
+	.store = queue_rq_affinity_store,
+};
+
 static struct attribute *default_attrs[] = {
 	&queue_requests_entry.attr,
 	&queue_ra_entry.attr,
@@ -205,6 +235,7 @@
 	&queue_iosched_entry.attr,
 	&queue_hw_sector_size_entry.attr,
 	&queue_nomerges_entry.attr,
+	&queue_rq_affinity_entry.attr,
 	NULL,
 };
 
@@ -310,7 +341,7 @@
 	if (!q->request_fn)
 		return 0;
 
-	ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj),
+	ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj),
 			  "%s", "queue");
 	if (ret < 0)
 		return ret;
@@ -339,6 +370,6 @@
 
 		kobject_uevent(&q->kobj, KOBJ_REMOVE);
 		kobject_del(&q->kobj);
-		kobject_put(&disk->dev.kobj);
+		kobject_put(&disk_to_dev(disk)->kobj);
 	}
 }

diff --git a/block/blk-tag.c b/block/blk-tag.c
index ed5166f..c0d419e 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c

@@ -29,7 +29,7 @@
  * __blk_free_tags - release a given set of tag maintenance info
  * @bqt:	the tag map to free
  *
- * Tries to free the specified @bqt@.  Returns true if it was
+ * Tries to free the specified @bqt.  Returns true if it was
  * actually freed and false if there are still references using it
  */
 static int __blk_free_tags(struct blk_queue_tag *bqt)
@@ -78,7 +78,7 @@
  * blk_free_tags - release a given set of tag maintenance info
  * @bqt:	the tag map to free
  *
- * For externally managed @bqt@ frees the map.  Callers of this
+ * For externally managed @bqt frees the map.  Callers of this
  * function must guarantee to have released all the queues that
  * might have been using this tag map.
  */
@@ -94,7 +94,7 @@
  * @q:  the request queue for the device
  *
  *  Notes:
- *	This is used to disabled tagged queuing to a device, yet leave
+ *	This is used to disable tagged queuing to a device, yet leave
  *	queue in function.
  **/
 void blk_queue_free_tags(struct request_queue *q)
@@ -271,7 +271,7 @@
  * @rq: the request that has completed
  *
  *  Description:
- *    Typically called when end_that_request_first() returns 0, meaning
+ *    Typically called when end_that_request_first() returns %0, meaning
  *    all transfers have been done for a request. It's important to call
  *    this function before end_that_request_last(), as that will put the
  *    request back on the free list thus corrupting the internal tag list.
@@ -337,6 +337,7 @@
 int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 {
 	struct blk_queue_tag *bqt = q->queue_tags;
+	unsigned max_depth, offset;
 	int tag;
 
 	if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
@@ -350,10 +351,19 @@
 	/*
 	 * Protect against shared tag maps, as we may not have exclusive
 	 * access to the tag map.
+	 *
+	 * We reserve a few tags just for sync IO, since we don't want
+	 * to starve sync IO on behalf of flooding async IO.
 	 */
+	max_depth = bqt->max_depth;
+	if (rq_is_sync(rq))
+		offset = 0;
+	else
+		offset = max_depth >> 2;
+
 	do {
-		tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
-		if (tag >= bqt->max_depth)
+		tag = find_next_zero_bit(bqt->tag_map, max_depth, offset);
+		if (tag >= max_depth)
 			return 1;
 
 	} while (test_and_set_bit_lock(tag, bqt->tag_map));

diff --git a/block/blk-timeout.c b/block/blk-timeout.c
new file mode 100644
index 0000000..972a63f
--- /dev/null
+++ b/block/blk-timeout.c

@@ -0,0 +1,238 @@
+/*
+ * Functions related to generic timeout handling of requests.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/fault-inject.h>
+
+#include "blk.h"
+
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+
+static DECLARE_FAULT_ATTR(fail_io_timeout);
+
+static int __init setup_fail_io_timeout(char *str)
+{
+	return setup_fault_attr(&fail_io_timeout, str);
+}
+__setup("fail_io_timeout=", setup_fail_io_timeout);
+
+int blk_should_fake_timeout(struct request_queue *q)
+{
+	if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
+		return 0;
+
+	return should_fail(&fail_io_timeout, 1);
+}
+
+static int __init fail_io_timeout_debugfs(void)
+{
+	return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout");
+}
+
+late_initcall(fail_io_timeout_debugfs);
+
+ssize_t part_timeout_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	int set = test_bit(QUEUE_FLAG_FAIL_IO, &disk->queue->queue_flags);
+
+	return sprintf(buf, "%d\n", set != 0);
+}
+
+ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	int val;
+
+	if (count) {
+		struct request_queue *q = disk->queue;
+		char *p = (char *) buf;
+
+		val = simple_strtoul(p, &p, 10);
+		spin_lock_irq(q->queue_lock);
+		if (val)
+			queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
+		else
+			queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
+		spin_unlock_irq(q->queue_lock);
+	}
+
+	return count;
+}
+
+#endif /* CONFIG_FAIL_IO_TIMEOUT */
+
+/*
+ * blk_delete_timer - Delete/cancel timer for a given function.
+ * @req:	request that we are canceling timer for
+ *
+ */
+void blk_delete_timer(struct request *req)
+{
+	struct request_queue *q = req->q;
+
+	/*
+	 * Nothing to detach
+	 */
+	if (!q->rq_timed_out_fn || !req->deadline)
+		return;
+
+	list_del_init(&req->timeout_list);
+
+	if (list_empty(&q->timeout_list))
+		del_timer(&q->timeout);
+}
+
+static void blk_rq_timed_out(struct request *req)
+{
+	struct request_queue *q = req->q;
+	enum blk_eh_timer_return ret;
+
+	ret = q->rq_timed_out_fn(req);
+	switch (ret) {
+	case BLK_EH_HANDLED:
+		__blk_complete_request(req);
+		break;
+	case BLK_EH_RESET_TIMER:
+		blk_clear_rq_complete(req);
+		blk_add_timer(req);
+		break;
+	case BLK_EH_NOT_HANDLED:
+		/*
+		 * LLD handles this for now but in the future
+		 * we can send a request msg to abort the command
+		 * and we can move more of the generic scsi eh code to
+		 * the blk layer.
+		 */
+		break;
+	default:
+		printk(KERN_ERR "block: bad eh return: %d\n", ret);
+		break;
+	}
+}
+
+void blk_rq_timed_out_timer(unsigned long data)
+{
+	struct request_queue *q = (struct request_queue *) data;
+	unsigned long flags, uninitialized_var(next), next_set = 0;
+	struct request *rq, *tmp;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+
+	list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) {
+		if (time_after_eq(jiffies, rq->deadline)) {
+			list_del_init(&rq->timeout_list);
+
+			/*
+			 * Check if we raced with end io completion
+			 */
+			if (blk_mark_rq_complete(rq))
+				continue;
+			blk_rq_timed_out(rq);
+		}
+		if (!next_set) {
+			next = rq->deadline;
+			next_set = 1;
+		} else if (time_after(next, rq->deadline))
+			next = rq->deadline;
+	}
+
+	if (next_set && !list_empty(&q->timeout_list))
+		mod_timer(&q->timeout, round_jiffies(next));
+
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+/**
+ * blk_abort_request -- Request request recovery for the specified command
+ * @req:	pointer to the request of interest
+ *
+ * This function requests that the block layer start recovery for the
+ * request by deleting the timer and calling the q's timeout function.
+ * LLDDs who implement their own error recovery MAY ignore the timeout
+ * event if they generated blk_abort_req. Must hold queue lock.
+ */
+void blk_abort_request(struct request *req)
+{
+	if (blk_mark_rq_complete(req))
+		return;
+	blk_delete_timer(req);
+	blk_rq_timed_out(req);
+}
+EXPORT_SYMBOL_GPL(blk_abort_request);
+
+/**
+ * blk_add_timer - Start timeout timer for a single request
+ * @req:	request that is about to start running.
+ *
+ * Notes:
+ *    Each request has its own timer, and as it is added to the queue, we
+ *    set up the timer. When the request completes, we cancel the timer.
+ */
+void blk_add_timer(struct request *req)
+{
+	struct request_queue *q = req->q;
+	unsigned long expiry;
+
+	if (!q->rq_timed_out_fn)
+		return;
+
+	BUG_ON(!list_empty(&req->timeout_list));
+	BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
+
+	if (req->timeout)
+		req->deadline = jiffies + req->timeout;
+	else {
+		req->deadline = jiffies + q->rq_timeout;
+		/*
+		 * Some LLDs, like scsi, peek at the timeout to prevent
+		 * a command from being retried forever.
+		 */
+		req->timeout = q->rq_timeout;
+	}
+	list_add_tail(&req->timeout_list, &q->timeout_list);
+
+	/*
+	 * If the timer isn't already pending or this timeout is earlier
+	 * than an existing one, modify the timer. Round to next nearest
+	 * second.
+	 */
+	expiry = round_jiffies(req->deadline);
+
+	/*
+	 * We use ->deadline == 0 to detect whether a timer was added or
+	 * not, so just increase to next jiffy for that specific case
+	 */
+	if (unlikely(!req->deadline))
+		req->deadline = 1;
+
+	if (!timer_pending(&q->timeout) ||
+	    time_before(expiry, q->timeout.expires))
+		mod_timer(&q->timeout, expiry);
+}
+
+/**
+ * blk_abort_queue -- Abort all request on given queue
+ * @queue:	pointer to queue
+ *
+ */
+void blk_abort_queue(struct request_queue *q)
+{
+	unsigned long flags;
+	struct request *rq, *tmp;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+
+	elv_abort_queue(q);
+
+	list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
+		blk_abort_request(rq);
+
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+}
+EXPORT_SYMBOL_GPL(blk_abort_queue);

diff --git a/block/blk.h b/block/blk.h
index c79f30e..e5c5797 100644
--- a/block/blk.h
+++ b/block/blk.h

@@ -17,6 +17,42 @@
 
 void blk_unplug_work(struct work_struct *work);
 void blk_unplug_timeout(unsigned long data);
+void blk_rq_timed_out_timer(unsigned long data);
+void blk_delete_timer(struct request *);
+void blk_add_timer(struct request *);
+
+/*
+ * Internal atomic flags for request handling
+ */
+enum rq_atomic_flags {
+	REQ_ATOM_COMPLETE = 0,
+};
+
+/*
+ * EH timer and IO completion will both attempt to 'grab' the request, make
+ * sure that only one of them suceeds
+ */
+static inline int blk_mark_rq_complete(struct request *rq)
+{
+	return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
+}
+
+static inline void blk_clear_rq_complete(struct request *rq)
+{
+	clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
+}
+
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+int blk_should_fake_timeout(struct request_queue *);
+ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
+ssize_t part_timeout_store(struct device *, struct device_attribute *,
+				const char *, size_t);
+#else
+static inline int blk_should_fake_timeout(struct request_queue *q)
+{
+	return 0;
+}
+#endif
 
 struct io_context *current_io_context(gfp_t gfp_flags, int node);
 
@@ -59,4 +95,16 @@
 
 #endif /* BLK_DEV_INTEGRITY */
 
+static inline int blk_cpu_to_group(int cpu)
+{
+#ifdef CONFIG_SCHED_MC
+	cpumask_t mask = cpu_coregroup_map(cpu);
+	return first_cpu(mask);
+#elif defined(CONFIG_SCHED_SMT)
+	return first_cpu(per_cpu(cpu_sibling_map, cpu));
+#else
+	return cpu;
+#endif
+}
+
 #endif

diff --git a/block/blktrace.c b/block/blktrace.c
index eb9651c..85049a7 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c

@@ -111,23 +111,9 @@
  */
 static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
 
-/*
- * Bio action bits of interest
- */
-static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) };
-
-/*
- * More could be added as needed, taking care to increment the decrementer
- * to get correct indexing
- */
-#define trace_barrier_bit(rw)	\
-	(((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0))
-#define trace_sync_bit(rw)	\
-	(((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
-#define trace_ahead_bit(rw)	\
-	(((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
-#define trace_meta_bit(rw)	\
-	(((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3))
+/* The ilog2() calls fall out because they're constant */
+#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \
+	  (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) )
 
 /*
  * The worker for the various blk_add_trace*() types. Fills out a
@@ -147,10 +133,11 @@
 		return;
 
 	what |= ddir_act[rw & WRITE];
-	what |= bio_act[trace_barrier_bit(rw)];
-	what |= bio_act[trace_sync_bit(rw)];
-	what |= bio_act[trace_ahead_bit(rw)];
-	what |= bio_act[trace_meta_bit(rw)];
+	what |= MASK_TC_BIT(rw, BARRIER);
+	what |= MASK_TC_BIT(rw, SYNC);
+	what |= MASK_TC_BIT(rw, AHEAD);
+	what |= MASK_TC_BIT(rw, META);
+	what |= MASK_TC_BIT(rw, DISCARD);
 
 	pid = tsk->pid;
 	if (unlikely(act_log_check(bt, what, sector, pid)))
@@ -382,7 +369,8 @@
 	if (!buts->buf_size || !buts->buf_nr)
 		return -EINVAL;
 
-	strcpy(buts->name, name);
+	strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
+	buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
 
 	/*
 	 * some device names have larger paths - convert the slashes

diff --git a/block/bsg.c b/block/bsg.c
index 0aae8d7..56cb343 100644
--- a/block/bsg.c
+++ b/block/bsg.c

@@ -283,7 +283,8 @@
 		next_rq->cmd_type = rq->cmd_type;
 
 		dxferp = (void*)(unsigned long)hdr->din_xferp;
-		ret =  blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len);
+		ret =  blk_rq_map_user(q, next_rq, NULL, dxferp,
+				       hdr->din_xfer_len, GFP_KERNEL);
 		if (ret)
 			goto out;
 	}
@@ -298,7 +299,8 @@
 		dxfer_len = 0;
 
 	if (dxfer_len) {
-		ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
+		ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len,
+				      GFP_KERNEL);
 		if (ret)
 			goto out;
 	}

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1e2aff8..6a062ee 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c

@@ -39,6 +39,7 @@
 #define CFQ_MIN_TT		(2)
 
 #define CFQ_SLICE_SCALE		(5)
+#define CFQ_HW_QUEUE_MIN	(5)
 
 #define RQ_CIC(rq)		\
 	((struct cfq_io_context *) (rq)->elevator_private)
@@ -86,7 +87,14 @@
 
 	int rq_in_driver;
 	int sync_flight;
+
+	/*
+	 * queue-depth detection
+	 */
+	int rq_queued;
 	int hw_tag;
+	int hw_tag_samples;
+	int rq_in_driver_peak;
 
 	/*
 	 * idle window management
@@ -244,7 +252,7 @@
 {
 	if (cfqd->busy_queues) {
 		cfq_log(cfqd, "schedule dispatch");
-		kblockd_schedule_work(&cfqd->unplug_work);
+		kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
 	}
 }
 
@@ -654,15 +662,6 @@
 	cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
 						cfqd->rq_in_driver);
 
-	/*
-	 * If the depth is larger 1, it really could be queueing. But lets
-	 * make the mark a little higher - idling could still be good for
-	 * low queueing, and a low queueing number could also just indicate
-	 * a SCSI mid layer like behaviour where limit+1 is often seen.
-	 */
-	if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
-		cfqd->hw_tag = 1;
-
 	cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
 }
 
@@ -686,6 +685,7 @@
 	list_del_init(&rq->queuelist);
 	cfq_del_rq_rb(rq);
 
+	cfqq->cfqd->rq_queued--;
 	if (rq_is_meta(rq)) {
 		WARN_ON(!cfqq->meta_pending);
 		cfqq->meta_pending--;
@@ -878,6 +878,14 @@
 	struct cfq_io_context *cic;
 	unsigned long sl;
 
+	/*
+	 * SSD device without seek penalty, disable idling. But only do so
+	 * for devices that support queuing, otherwise we still have a problem
+	 * with sync vs async workloads.
+	 */
+	if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)
+		return;
+
 	WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
 	WARN_ON(cfq_cfqq_slice_new(cfqq));
 
@@ -1833,6 +1841,7 @@
 {
 	struct cfq_io_context *cic = RQ_CIC(rq);
 
+	cfqd->rq_queued++;
 	if (rq_is_meta(rq))
 		cfqq->meta_pending++;
 
@@ -1880,6 +1889,31 @@
 	cfq_rq_enqueued(cfqd, cfqq, rq);
 }
 
+/*
+ * Update hw_tag based on peak queue depth over 50 samples under
+ * sufficient load.
+ */
+static void cfq_update_hw_tag(struct cfq_data *cfqd)
+{
+	if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak)
+		cfqd->rq_in_driver_peak = cfqd->rq_in_driver;
+
+	if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
+	    cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
+		return;
+
+	if (cfqd->hw_tag_samples++ < 50)
+		return;
+
+	if (cfqd->rq_in_driver_peak >= CFQ_HW_QUEUE_MIN)
+		cfqd->hw_tag = 1;
+	else
+		cfqd->hw_tag = 0;
+
+	cfqd->hw_tag_samples = 0;
+	cfqd->rq_in_driver_peak = 0;
+}
+
 static void cfq_completed_request(struct request_queue *q, struct request *rq)
 {
 	struct cfq_queue *cfqq = RQ_CFQQ(rq);
@@ -1890,6 +1924,8 @@
 	now = jiffies;
 	cfq_log_cfqq(cfqd, cfqq, "complete");
 
+	cfq_update_hw_tag(cfqd);
+
 	WARN_ON(!cfqd->rq_in_driver);
 	WARN_ON(!cfqq->dispatched);
 	cfqd->rq_in_driver--;
@@ -2200,6 +2236,7 @@
 	cfqd->cfq_slice[1] = cfq_slice_sync;
 	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
+	cfqd->hw_tag = 1;
 
 	return cfqd;
 }

diff --git a/block/cmd-filter.c b/block/cmd-filter.c
index 79c1499..e669aed 100644
--- a/block/cmd-filter.c
+++ b/block/cmd-filter.c

@@ -211,14 +211,10 @@
 {
 	int ret;
 	struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
-	struct kobject *parent = kobject_get(disk->holder_dir->parent);
 
-	if (!parent)
-		return -ENODEV;
-
-	ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent,
+	ret = kobject_init_and_add(&filter->kobj, &rcf_ktype,
+				   &disk_to_dev(disk)->kobj,
 				   "%s", "cmd_filter");
-
 	if (ret < 0)
 		return ret;
 
@@ -231,7 +227,6 @@
 	struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
 
 	kobject_put(&filter->kobj);
-	kobject_put(disk->holder_dir->parent);
 }
 EXPORT_SYMBOL(blk_unregister_filter);
 #endif

diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index c23177e..1e559fb 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c

@@ -788,6 +788,7 @@
 		return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
 	case BLKFLSBUF:
 	case BLKROSET:
+	case BLKDISCARD:
 	/*
 	 * the ones below are implemented in blkdev_locked_ioctl,
 	 * but we call blkdev_ioctl, which gets the lock for us

diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 342448c..fd31117 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c

@@ -33,7 +33,7 @@
 	 */
 	struct rb_root sort_list[2];	
 	struct list_head fifo_list[2];
-	
+
 	/*
 	 * next in sort order. read, write or both are NULL
 	 */
@@ -53,7 +53,11 @@
 
 static void deadline_move_request(struct deadline_data *, struct request *);
 
-#define RQ_RB_ROOT(dd, rq)	(&(dd)->sort_list[rq_data_dir((rq))])
+static inline struct rb_root *
+deadline_rb_root(struct deadline_data *dd, struct request *rq)
+{
+	return &dd->sort_list[rq_data_dir(rq)];
+}
 
 /*
  * get the request after `rq' in sector-sorted order
@@ -72,15 +76,11 @@
 static void
 deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
 {
-	struct rb_root *root = RQ_RB_ROOT(dd, rq);
+	struct rb_root *root = deadline_rb_root(dd, rq);
 	struct request *__alias;
 
-retry:
-	__alias = elv_rb_add(root, rq);
-	if (unlikely(__alias)) {
+	while (unlikely(__alias = elv_rb_add(root, rq)))
 		deadline_move_request(dd, __alias);
-		goto retry;
-	}
 }
 
 static inline void
@@ -91,7 +91,7 @@
 	if (dd->next_rq[data_dir] == rq)
 		dd->next_rq[data_dir] = deadline_latter_request(rq);
 
-	elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
+	elv_rb_del(deadline_rb_root(dd, rq), rq);
 }
 
 /*
@@ -106,7 +106,7 @@
 	deadline_add_rq_rb(dd, rq);
 
 	/*
-	 * set expire time (only used for reads) and add to fifo list
+	 * set expire time and add to fifo list
 	 */
 	rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
 	list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
@@ -162,7 +162,7 @@
 	 * if the merge was a front merge, we need to reposition request
 	 */
 	if (type == ELEVATOR_FRONT_MERGE) {
-		elv_rb_del(RQ_RB_ROOT(dd, req), req);
+		elv_rb_del(deadline_rb_root(dd, req), req);
 		deadline_add_rq_rb(dd, req);
 	}
 }
@@ -212,7 +212,7 @@
 	dd->next_rq[WRITE] = NULL;
 	dd->next_rq[data_dir] = deadline_latter_request(rq);
 
-	dd->last_sector = rq->sector + rq->nr_sectors;
+	dd->last_sector = rq_end_sector(rq);
 
 	/*
 	 * take it off the sort and fifo list, move
@@ -222,7 +222,7 @@
 }
 
 /*
- * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
+ * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
  * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
  */
 static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
@@ -258,17 +258,9 @@
 	else
 		rq = dd->next_rq[READ];
 
-	if (rq) {
-		/* we have a "next request" */
-		
-		if (dd->last_sector != rq->sector)
-			/* end the batch on a non sequential request */
-			dd->batching += dd->fifo_batch;
-		
-		if (dd->batching < dd->fifo_batch)
-			/* we are still entitled to batch */
-			goto dispatch_request;
-	}
+	if (rq && dd->batching < dd->fifo_batch)
+		/* we have a next request are still entitled to batch */
+		goto dispatch_request;
 
 	/*
 	 * at this point we are not running a batch. select the appropriate

diff --git a/block/elevator.c b/block/elevator.c
index ed6f8f3..0451892 100644
--- a/block/elevator.c
+++ b/block/elevator.c

@@ -34,8 +34,9 @@
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
 #include <linux/hash.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
+#include "blk.h"
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
@@ -75,6 +76,12 @@
 		return 0;
 
 	/*
+	 * Don't merge file system requests and discard requests
+	 */
+	if (bio_discard(bio) != bio_discard(rq->bio))
+		return 0;
+
+	/*
 	 * different data direction or already started, don't merge
 	 */
 	if (bio_data_dir(bio) != rq_data_dir(rq))
@@ -438,6 +445,8 @@
 	list_for_each_prev(entry, &q->queue_head) {
 		struct request *pos = list_entry_rq(entry);
 
+		if (blk_discard_rq(rq) != blk_discard_rq(pos))
+			break;
 		if (rq_data_dir(rq) != rq_data_dir(pos))
 			break;
 		if (pos->cmd_flags & stop_flags)
@@ -607,7 +616,7 @@
 		break;
 
 	case ELEVATOR_INSERT_SORT:
-		BUG_ON(!blk_fs_request(rq));
+		BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq));
 		rq->cmd_flags |= REQ_SORTED;
 		q->nr_sorted++;
 		if (rq_mergeable(rq)) {
@@ -692,7 +701,7 @@
 		 * this request is scheduling boundary, update
 		 * end_sector
 		 */
-		if (blk_fs_request(rq)) {
+		if (blk_fs_request(rq) || blk_discard_rq(rq)) {
 			q->end_sector = rq_end_sector(rq);
 			q->boundary_rq = rq;
 		}
@@ -745,7 +754,7 @@
 		 * not ever see it.
 		 */
 		if (blk_empty_barrier(rq)) {
-			end_queued_request(rq, 1);
+			__blk_end_request(rq, 0, blk_rq_bytes(rq));
 			continue;
 		}
 		if (!(rq->cmd_flags & REQ_STARTED)) {
@@ -764,6 +773,12 @@
 			 */
 			rq->cmd_flags |= REQ_STARTED;
 			blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+
+			/*
+			 * We are now handing the request to the hardware,
+			 * add the timeout handler
+			 */
+			blk_add_timer(rq);
 		}
 
 		if (!q->boundary_rq || q->boundary_rq == rq) {
@@ -782,7 +797,6 @@
 			 * device can handle
 			 */
 			rq->nr_phys_segments++;
-			rq->nr_hw_segments++;
 		}
 
 		if (!q->prep_rq_fn)
@@ -805,14 +819,13 @@
 				 * so that we don't add it again
 				 */
 				--rq->nr_phys_segments;
-				--rq->nr_hw_segments;
 			}
 
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL) {
 			rq->cmd_flags |= REQ_QUIET;
-			end_queued_request(rq, 0);
+			__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
 			break;
@@ -901,6 +914,19 @@
 	return ELV_MQUEUE_MAY;
 }
 
+void elv_abort_queue(struct request_queue *q)
+{
+	struct request *rq;
+
+	while (!list_empty(&q->queue_head)) {
+		rq = list_entry_rq(q->queue_head.next);
+		rq->cmd_flags |= REQ_QUIET;
+		blk_add_trace_rq(q, rq, BLK_TA_ABORT);
+		__blk_end_request(rq, -EIO, blk_rq_bytes(rq));
+	}
+}
+EXPORT_SYMBOL(elv_abort_queue);
+
 void elv_completed_request(struct request_queue *q, struct request *rq)
 {
 	elevator_t *e = q->elevator;

diff --git a/block/genhd.c b/block/genhd.c
index e0ce23a..4cd3433 100644
--- a/block/genhd.c
+++ b/block/genhd.c

@@ -16,6 +16,7 @@
 #include <linux/kobj_map.h>
 #include <linux/buffer_head.h>
 #include <linux/mutex.h>
+#include <linux/idr.h>
 
 #include "blk.h"
 
@@ -24,8 +25,194 @@
 struct kobject *block_depr;
 #endif
 
+/* for extended dynamic devt allocation, currently only one major is used */
+#define MAX_EXT_DEVT		(1 << MINORBITS)
+
+/* For extended devt allocation.  ext_devt_mutex prevents look up
+ * results from going away underneath its user.
+ */
+static DEFINE_MUTEX(ext_devt_mutex);
+static DEFINE_IDR(ext_devt_idr);
+
 static struct device_type disk_type;
 
+/**
+ * disk_get_part - get partition
+ * @disk: disk to look partition from
+ * @partno: partition number
+ *
+ * Look for partition @partno from @disk.  If found, increment
+ * reference count and return it.
+ *
+ * CONTEXT:
+ * Don't care.
+ *
+ * RETURNS:
+ * Pointer to the found partition on success, NULL if not found.
+ */
+struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
+{
+	struct hd_struct *part = NULL;
+	struct disk_part_tbl *ptbl;
+
+	if (unlikely(partno < 0))
+		return NULL;
+
+	rcu_read_lock();
+
+	ptbl = rcu_dereference(disk->part_tbl);
+	if (likely(partno < ptbl->len)) {
+		part = rcu_dereference(ptbl->part[partno]);
+		if (part)
+			get_device(part_to_dev(part));
+	}
+
+	rcu_read_unlock();
+
+	return part;
+}
+EXPORT_SYMBOL_GPL(disk_get_part);
+
+/**
+ * disk_part_iter_init - initialize partition iterator
+ * @piter: iterator to initialize
+ * @disk: disk to iterate over
+ * @flags: DISK_PITER_* flags
+ *
+ * Initialize @piter so that it iterates over partitions of @disk.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
+			  unsigned int flags)
+{
+	struct disk_part_tbl *ptbl;
+
+	rcu_read_lock();
+	ptbl = rcu_dereference(disk->part_tbl);
+
+	piter->disk = disk;
+	piter->part = NULL;
+
+	if (flags & DISK_PITER_REVERSE)
+		piter->idx = ptbl->len - 1;
+	else if (flags & DISK_PITER_INCL_PART0)
+		piter->idx = 0;
+	else
+		piter->idx = 1;
+
+	piter->flags = flags;
+
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(disk_part_iter_init);
+
+/**
+ * disk_part_iter_next - proceed iterator to the next partition and return it
+ * @piter: iterator of interest
+ *
+ * Proceed @piter to the next partition and return it.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
+{
+	struct disk_part_tbl *ptbl;
+	int inc, end;
+
+	/* put the last partition */
+	disk_put_part(piter->part);
+	piter->part = NULL;
+
+	/* get part_tbl */
+	rcu_read_lock();
+	ptbl = rcu_dereference(piter->disk->part_tbl);
+
+	/* determine iteration parameters */
+	if (piter->flags & DISK_PITER_REVERSE) {
+		inc = -1;
+		if (piter->flags & DISK_PITER_INCL_PART0)
+			end = -1;
+		else
+			end = 0;
+	} else {
+		inc = 1;
+		end = ptbl->len;
+	}
+
+	/* iterate to the next partition */
+	for (; piter->idx != end; piter->idx += inc) {
+		struct hd_struct *part;
+
+		part = rcu_dereference(ptbl->part[piter->idx]);
+		if (!part)
+			continue;
+		if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects)
+			continue;
+
+		get_device(part_to_dev(part));
+		piter->part = part;
+		piter->idx += inc;
+		break;
+	}
+
+	rcu_read_unlock();
+
+	return piter->part;
+}
+EXPORT_SYMBOL_GPL(disk_part_iter_next);
+
+/**
+ * disk_part_iter_exit - finish up partition iteration
+ * @piter: iter of interest
+ *
+ * Called when iteration is over.  Cleans up @piter.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+void disk_part_iter_exit(struct disk_part_iter *piter)
+{
+	disk_put_part(piter->part);
+	piter->part = NULL;
+}
+EXPORT_SYMBOL_GPL(disk_part_iter_exit);
+
+/**
+ * disk_map_sector_rcu - map sector to partition
+ * @disk: gendisk of interest
+ * @sector: sector to map
+ *
+ * Find out which partition @sector maps to on @disk.  This is
+ * primarily used for stats accounting.
+ *
+ * CONTEXT:
+ * RCU read locked.  The returned partition pointer is valid only
+ * while preemption is disabled.
+ *
+ * RETURNS:
+ * Found partition on success, part0 is returned if no partition matches
+ */
+struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
+{
+	struct disk_part_tbl *ptbl;
+	int i;
+
+	ptbl = rcu_dereference(disk->part_tbl);
+
+	for (i = 1; i < ptbl->len; i++) {
+		struct hd_struct *part = rcu_dereference(ptbl->part[i]);
+
+		if (part && part->start_sect <= sector &&
+		    sector < part->start_sect + part->nr_sects)
+			return part;
+	}
+	return &disk->part0;
+}
+EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
+
 /*
  * Can be deleted altogether. Later.
  *
@@ -43,14 +230,14 @@
 }
 
 #ifdef CONFIG_PROC_FS
-void blkdev_show(struct seq_file *f, off_t offset)
+void blkdev_show(struct seq_file *seqf, off_t offset)
 {
 	struct blk_major_name *dp;
 
 	if (offset < BLKDEV_MAJOR_HASH_SIZE) {
 		mutex_lock(&block_class_lock);
 		for (dp = major_names[offset]; dp; dp = dp->next)
-			seq_printf(f, "%3d %s\n", dp->major, dp->name);
+			seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
 		mutex_unlock(&block_class_lock);
 	}
 }
@@ -136,6 +323,118 @@
 
 static struct kobj_map *bdev_map;
 
+/**
+ * blk_mangle_minor - scatter minor numbers apart
+ * @minor: minor number to mangle
+ *
+ * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
+ * is enabled.  Mangling twice gives the original value.
+ *
+ * RETURNS:
+ * Mangled value.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+static int blk_mangle_minor(int minor)
+{
+#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
+	int i;
+
+	for (i = 0; i < MINORBITS / 2; i++) {
+		int low = minor & (1 << i);
+		int high = minor & (1 << (MINORBITS - 1 - i));
+		int distance = MINORBITS - 1 - 2 * i;
+
+		minor ^= low | high;	/* clear both bits */
+		low <<= distance;	/* swap the positions */
+		high >>= distance;
+		minor |= low | high;	/* and set */
+	}
+#endif
+	return minor;
+}
+
+/**
+ * blk_alloc_devt - allocate a dev_t for a partition
+ * @part: partition to allocate dev_t for
+ * @gfp_mask: memory allocation flag
+ * @devt: out parameter for resulting dev_t
+ *
+ * Allocate a dev_t for block device.
+ *
+ * RETURNS:
+ * 0 on success, allocated dev_t is returned in *@devt.  -errno on
+ * failure.
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
+{
+	struct gendisk *disk = part_to_disk(part);
+	int idx, rc;
+
+	/* in consecutive minor range? */
+	if (part->partno < disk->minors) {
+		*devt = MKDEV(disk->major, disk->first_minor + part->partno);
+		return 0;
+	}
+
+	/* allocate ext devt */
+	do {
+		if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
+			return -ENOMEM;
+		rc = idr_get_new(&ext_devt_idr, part, &idx);
+	} while (rc == -EAGAIN);
+
+	if (rc)
+		return rc;
+
+	if (idx > MAX_EXT_DEVT) {
+		idr_remove(&ext_devt_idr, idx);
+		return -EBUSY;
+	}
+
+	*devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
+	return 0;
+}
+
+/**
+ * blk_free_devt - free a dev_t
+ * @devt: dev_t to free
+ *
+ * Free @devt which was allocated using blk_alloc_devt().
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void blk_free_devt(dev_t devt)
+{
+	might_sleep();
+
+	if (devt == MKDEV(0, 0))
+		return;
+
+	if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
+		mutex_lock(&ext_devt_mutex);
+		idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
+		mutex_unlock(&ext_devt_mutex);
+	}
+}
+
+static char *bdevt_str(dev_t devt, char *buf)
+{
+	if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
+		char tbuf[BDEVT_SIZE];
+		snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
+		snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
+	} else
+		snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
+
+	return buf;
+}
+
 /*
  * Register device numbers dev..(dev+range-1)
  * range must be nonzero
@@ -157,11 +456,11 @@
 
 EXPORT_SYMBOL(blk_unregister_region);
 
-static struct kobject *exact_match(dev_t devt, int *part, void *data)
+static struct kobject *exact_match(dev_t devt, int *partno, void *data)
 {
 	struct gendisk *p = data;
 
-	return &p->dev.kobj;
+	return &disk_to_dev(p)->kobj;
 }
 
 static int exact_lock(dev_t devt, void *data)
@@ -179,21 +478,46 @@
  *
  * This function registers the partitioning information in @disk
  * with the kernel.
+ *
+ * FIXME: error handling
  */
 void add_disk(struct gendisk *disk)
 {
 	struct backing_dev_info *bdi;
+	dev_t devt;
 	int retval;
 
+	/* minors == 0 indicates to use ext devt from part0 and should
+	 * be accompanied with EXT_DEVT flag.  Make sure all
+	 * parameters make sense.
+	 */
+	WARN_ON(disk->minors && !(disk->major || disk->first_minor));
+	WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
+
 	disk->flags |= GENHD_FL_UP;
-	blk_register_region(MKDEV(disk->major, disk->first_minor),
-			    disk->minors, NULL, exact_match, exact_lock, disk);
+
+	retval = blk_alloc_devt(&disk->part0, &devt);
+	if (retval) {
+		WARN_ON(1);
+		return;
+	}
+	disk_to_dev(disk)->devt = devt;
+
+	/* ->major and ->first_minor aren't supposed to be
+	 * dereferenced from here on, but set them just in case.
+	 */
+	disk->major = MAJOR(devt);
+	disk->first_minor = MINOR(devt);
+
+	blk_register_region(disk_devt(disk), disk->minors, NULL,
+			    exact_match, exact_lock, disk);
 	register_disk(disk);
 	blk_register_queue(disk);
 
 	bdi = &disk->queue->backing_dev_info;
-	bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor));
-	retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi");
+	bdi_register_dev(bdi, disk_devt(disk));
+	retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
+				   "bdi");
 	WARN_ON(retval);
 }
 
@@ -202,79 +526,72 @@
 
 void unlink_gendisk(struct gendisk *disk)
 {
-	sysfs_remove_link(&disk->dev.kobj, "bdi");
+	sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
 	bdi_unregister(&disk->queue->backing_dev_info);
 	blk_unregister_queue(disk);
-	blk_unregister_region(MKDEV(disk->major, disk->first_minor),
-			      disk->minors);
+	blk_unregister_region(disk_devt(disk), disk->minors);
 }
 
 /**
  * get_gendisk - get partitioning information for a given device
- * @dev: device to get partitioning information for
+ * @devt: device to get partitioning information for
+ * @part: returned partition index
  *
  * This function gets the structure containing partitioning
- * information for the given device @dev.
+ * information for the given device @devt.
  */
-struct gendisk *get_gendisk(dev_t devt, int *part)
+struct gendisk *get_gendisk(dev_t devt, int *partno)
 {
-	struct kobject *kobj = kobj_lookup(bdev_map, devt, part);
-	struct device *dev = kobj_to_dev(kobj);
+	struct gendisk *disk = NULL;
 
-	return  kobj ? dev_to_disk(dev) : NULL;
-}
+	if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
+		struct kobject *kobj;
 
-/*
- * print a partitions - intended for places where the root filesystem can't be
- * mounted and thus to give the victim some idea of what went wrong
- */
-static int printk_partition(struct device *dev, void *data)
-{
-	struct gendisk *sgp;
-	char buf[BDEVNAME_SIZE];
-	int n;
+		kobj = kobj_lookup(bdev_map, devt, partno);
+		if (kobj)
+			disk = dev_to_disk(kobj_to_dev(kobj));
+	} else {
+		struct hd_struct *part;
 
-	if (dev->type != &disk_type)
-		goto exit;
-
-	sgp = dev_to_disk(dev);
-	/*
-	 * Don't show empty devices or things that have been surpressed
-	 */
-	if (get_capacity(sgp) == 0 ||
-	    (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
-		goto exit;
-
-	/*
-	 * Note, unlike /proc/partitions, I am showing the numbers in
-	 * hex - the same format as the root= option takes.
-	 */
-	printk("%02x%02x %10llu %s",
-		sgp->major, sgp->first_minor,
-		(unsigned long long)get_capacity(sgp) >> 1,
-		disk_name(sgp, 0, buf));
-	if (sgp->driverfs_dev != NULL &&
-	    sgp->driverfs_dev->driver != NULL)
-		printk(" driver: %s\n",
-			sgp->driverfs_dev->driver->name);
-	else
-		printk(" (driver?)\n");
-
-	/* now show the partitions */
-	for (n = 0; n < sgp->minors - 1; ++n) {
-		if (sgp->part[n] == NULL)
-			goto exit;
-		if (sgp->part[n]->nr_sects == 0)
-			goto exit;
-		printk("  %02x%02x %10llu %s\n",
-			sgp->major, n + 1 + sgp->first_minor,
-			(unsigned long long)sgp->part[n]->nr_sects >> 1,
-			disk_name(sgp, n + 1, buf));
+		mutex_lock(&ext_devt_mutex);
+		part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
+		if (part && get_disk(part_to_disk(part))) {
+			*partno = part->partno;
+			disk = part_to_disk(part);
+		}
+		mutex_unlock(&ext_devt_mutex);
 	}
-exit:
-	return 0;
+
+	return disk;
 }
 
+/**
+ * bdget_disk - do bdget() by gendisk and partition number
+ * @disk: gendisk of interest
+ * @partno: partition number
+ *
+ * Find partition @partno from @disk, do bdget() on it.
+ *
+ * CONTEXT:
+ * Don't care.
+ *
+ * RETURNS:
+ * Resulting block_device on success, NULL on failure.
+ */
+struct block_device *bdget_disk(struct gendisk *disk, int partno)
+{
+	struct hd_struct *part;
+	struct block_device *bdev = NULL;
+
+	part = disk_get_part(disk, partno);
+	if (part)
+		bdev = bdget(part_devt(part));
+	disk_put_part(part);
+
+	return bdev;
+}
+EXPORT_SYMBOL(bdget_disk);
+
 /*
  * print a full list of all partitions - intended for places where the root
  * filesystem can't be mounted and thus to give the victim some idea of what
@@ -282,120 +599,145 @@
  */
 void __init printk_all_partitions(void)
 {
-	mutex_lock(&block_class_lock);
-	class_for_each_device(&block_class, NULL, NULL, printk_partition);
-	mutex_unlock(&block_class_lock);
+	struct class_dev_iter iter;
+	struct device *dev;
+
+	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+	while ((dev = class_dev_iter_next(&iter))) {
+		struct gendisk *disk = dev_to_disk(dev);
+		struct disk_part_iter piter;
+		struct hd_struct *part;
+		char name_buf[BDEVNAME_SIZE];
+		char devt_buf[BDEVT_SIZE];
+
+		/*
+		 * Don't show empty devices or things that have been
+		 * surpressed
+		 */
+		if (get_capacity(disk) == 0 ||
+		    (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
+			continue;
+
+		/*
+		 * Note, unlike /proc/partitions, I am showing the
+		 * numbers in hex - the same format as the root=
+		 * option takes.
+		 */
+		disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+		while ((part = disk_part_iter_next(&piter))) {
+			bool is_part0 = part == &disk->part0;
+
+			printk("%s%s %10llu %s", is_part0 ? "" : "  ",
+			       bdevt_str(part_devt(part), devt_buf),
+			       (unsigned long long)part->nr_sects >> 1,
+			       disk_name(disk, part->partno, name_buf));
+			if (is_part0) {
+				if (disk->driverfs_dev != NULL &&
+				    disk->driverfs_dev->driver != NULL)
+					printk(" driver: %s\n",
+					      disk->driverfs_dev->driver->name);
+				else
+					printk(" (driver?)\n");
+			} else
+				printk("\n");
+		}
+		disk_part_iter_exit(&piter);
+	}
+	class_dev_iter_exit(&iter);
 }
 
 #ifdef CONFIG_PROC_FS
 /* iterator */
-static int find_start(struct device *dev, void *data)
+static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
 {
-	loff_t *k = data;
+	loff_t skip = *pos;
+	struct class_dev_iter *iter;
+	struct device *dev;
 
-	if (dev->type != &disk_type)
-		return 0;
-	if (!*k)
-		return 1;
-	(*k)--;
-	return 0;
+	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return ERR_PTR(-ENOMEM);
+
+	seqf->private = iter;
+	class_dev_iter_init(iter, &block_class, NULL, &disk_type);
+	do {
+		dev = class_dev_iter_next(iter);
+		if (!dev)
+			return NULL;
+	} while (skip--);
+
+	return dev_to_disk(dev);
 }
 
-static void *part_start(struct seq_file *part, loff_t *pos)
+static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
 {
 	struct device *dev;
-	loff_t k = *pos;
 
-	if (!k)
-		part->private = (void *)1LU;	/* tell show to print header */
-
-	mutex_lock(&block_class_lock);
-	dev = class_find_device(&block_class, NULL, &k, find_start);
-	if (dev) {
-		put_device(dev);
+	(*pos)++;
+	dev = class_dev_iter_next(seqf->private);
+	if (dev)
 		return dev_to_disk(dev);
-	}
+
 	return NULL;
 }
 
-static int find_next(struct device *dev, void *data)
+static void disk_seqf_stop(struct seq_file *seqf, void *v)
 {
-	if (dev->type == &disk_type)
-		return 1;
-	return 0;
-}
+	struct class_dev_iter *iter = seqf->private;
 
-static void *part_next(struct seq_file *part, void *v, loff_t *pos)
-{
-	struct gendisk *gp = v;
-	struct device *dev;
-	++*pos;
-	dev = class_find_device(&block_class, &gp->dev, NULL, find_next);
-	if (dev) {
-		put_device(dev);
-		return dev_to_disk(dev);
+	/* stop is called even after start failed :-( */
+	if (iter) {
+		class_dev_iter_exit(iter);
+		kfree(iter);
 	}
-	return NULL;
 }
 
-static void part_stop(struct seq_file *part, void *v)
+static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
 {
-	mutex_unlock(&block_class_lock);
+	static void *p;
+
+	p = disk_seqf_start(seqf, pos);
+	if (!IS_ERR(p) && p && !*pos)
+		seq_puts(seqf, "major minor  #blocks  name\n\n");
+	return p;
 }
 
-static int show_partition(struct seq_file *part, void *v)
+static int show_partition(struct seq_file *seqf, void *v)
 {
 	struct gendisk *sgp = v;
-	int n;
+	struct disk_part_iter piter;
+	struct hd_struct *part;
 	char buf[BDEVNAME_SIZE];
 
-	/*
-	 * Print header if start told us to do.  This is to preserve
-	 * the original behavior of not printing header if no
-	 * partition exists.  This hackery will be removed later with
-	 * class iteration clean up.
-	 */
-	if (part->private) {
-		seq_puts(part, "major minor  #blocks  name\n\n");
-		part->private = NULL;
-	}
-
 	/* Don't show non-partitionable removeable devices or empty devices */
-	if (!get_capacity(sgp) ||
-			(sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE)))
+	if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
+				   (sgp->flags & GENHD_FL_REMOVABLE)))
 		return 0;
 	if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
 		return 0;
 
 	/* show the full disk and all non-0 size partitions of it */
-	seq_printf(part, "%4d  %4d %10llu %s\n",
-		sgp->major, sgp->first_minor,
-		(unsigned long long)get_capacity(sgp) >> 1,
-		disk_name(sgp, 0, buf));
-	for (n = 0; n < sgp->minors - 1; n++) {
-		if (!sgp->part[n])
-			continue;
-		if (sgp->part[n]->nr_sects == 0)
-			continue;
-		seq_printf(part, "%4d  %4d %10llu %s\n",
-			sgp->major, n + 1 + sgp->first_minor,
-			(unsigned long long)sgp->part[n]->nr_sects >> 1 ,
-			disk_name(sgp, n + 1, buf));
-	}
+	disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
+	while ((part = disk_part_iter_next(&piter)))
+		seq_printf(seqf, "%4d  %7d %10llu %s\n",
+			   MAJOR(part_devt(part)), MINOR(part_devt(part)),
+			   (unsigned long long)part->nr_sects >> 1,
+			   disk_name(sgp, part->partno, buf));
+	disk_part_iter_exit(&piter);
 
 	return 0;
 }
 
 const struct seq_operations partitions_op = {
-	.start	= part_start,
-	.next	= part_next,
-	.stop	= part_stop,
+	.start	= show_partition_start,
+	.next	= disk_seqf_next,
+	.stop	= disk_seqf_stop,
 	.show	= show_partition
 };
 #endif
 
 
-static struct kobject *base_probe(dev_t devt, int *part, void *data)
+static struct kobject *base_probe(dev_t devt, int *partno, void *data)
 {
 	if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
 		/* Make old-style 2.4 aliases work */
@@ -431,6 +773,14 @@
 	return sprintf(buf, "%d\n", disk->minors);
 }
 
+static ssize_t disk_ext_range_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%d\n", disk_max_parts(disk));
+}
+
 static ssize_t disk_removable_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
@@ -445,15 +795,7 @@
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
-	return sprintf(buf, "%d\n", disk->policy ? 1 : 0);
-}
-
-static ssize_t disk_size_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	struct gendisk *disk = dev_to_disk(dev);
-
-	return sprintf(buf, "%llu\n", (unsigned long long)get_capacity(disk));
+	return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
 }
 
 static ssize_t disk_capability_show(struct device *dev,
@@ -464,73 +806,26 @@
 	return sprintf(buf, "%x\n", disk->flags);
 }
 
-static ssize_t disk_stat_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	struct gendisk *disk = dev_to_disk(dev);
-
-	preempt_disable();
-	disk_round_stats(disk);
-	preempt_enable();
-	return sprintf(buf,
-		"%8lu %8lu %8llu %8u "
-		"%8lu %8lu %8llu %8u "
-		"%8u %8u %8u"
-		"\n",
-		disk_stat_read(disk, ios[READ]),
-		disk_stat_read(disk, merges[READ]),
-		(unsigned long long)disk_stat_read(disk, sectors[READ]),
-		jiffies_to_msecs(disk_stat_read(disk, ticks[READ])),
-		disk_stat_read(disk, ios[WRITE]),
-		disk_stat_read(disk, merges[WRITE]),
-		(unsigned long long)disk_stat_read(disk, sectors[WRITE]),
-		jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])),
-		disk->in_flight,
-		jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
-		jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
-}
-
-#ifdef CONFIG_FAIL_MAKE_REQUEST
-static ssize_t disk_fail_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	struct gendisk *disk = dev_to_disk(dev);
-
-	return sprintf(buf, "%d\n", disk->flags & GENHD_FL_FAIL ? 1 : 0);
-}
-
-static ssize_t disk_fail_store(struct device *dev,
-			       struct device_attribute *attr,
-			       const char *buf, size_t count)
-{
-	struct gendisk *disk = dev_to_disk(dev);
-	int i;
-
-	if (count > 0 && sscanf(buf, "%d", &i) > 0) {
-		if (i == 0)
-			disk->flags &= ~GENHD_FL_FAIL;
-		else
-			disk->flags |= GENHD_FL_FAIL;
-	}
-
-	return count;
-}
-
-#endif
-
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
+static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
 static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
-static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL);
+static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
-static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL);
+static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
-	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, disk_fail_show, disk_fail_store);
+	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
+#endif
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+static struct device_attribute dev_attr_fail_timeout =
+	__ATTR(io-timeout-fail,  S_IRUGO|S_IWUSR, part_timeout_show,
+		part_timeout_store);
 #endif
 
 static struct attribute *disk_attrs[] = {
 	&dev_attr_range.attr,
+	&dev_attr_ext_range.attr,
 	&dev_attr_removable.attr,
 	&dev_attr_ro.attr,
 	&dev_attr_size.attr,
@@ -539,6 +834,9 @@
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+	&dev_attr_fail_timeout.attr,
+#endif
 	NULL
 };
 
@@ -551,13 +849,87 @@
 	NULL
 };
 
+static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
+{
+	struct disk_part_tbl *ptbl =
+		container_of(head, struct disk_part_tbl, rcu_head);
+
+	kfree(ptbl);
+}
+
+/**
+ * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
+ * @disk: disk to replace part_tbl for
+ * @new_ptbl: new part_tbl to install
+ *
+ * Replace disk->part_tbl with @new_ptbl in RCU-safe way.  The
+ * original ptbl is freed using RCU callback.
+ *
+ * LOCKING:
+ * Matching bd_mutx locked.
+ */
+static void disk_replace_part_tbl(struct gendisk *disk,
+				  struct disk_part_tbl *new_ptbl)
+{
+	struct disk_part_tbl *old_ptbl = disk->part_tbl;
+
+	rcu_assign_pointer(disk->part_tbl, new_ptbl);
+	if (old_ptbl)
+		call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
+}
+
+/**
+ * disk_expand_part_tbl - expand disk->part_tbl
+ * @disk: disk to expand part_tbl for
+ * @partno: expand such that this partno can fit in
+ *
+ * Expand disk->part_tbl such that @partno can fit in.  disk->part_tbl
+ * uses RCU to allow unlocked dereferencing for stats and other stuff.
+ *
+ * LOCKING:
+ * Matching bd_mutex locked, might sleep.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int disk_expand_part_tbl(struct gendisk *disk, int partno)
+{
+	struct disk_part_tbl *old_ptbl = disk->part_tbl;
+	struct disk_part_tbl *new_ptbl;
+	int len = old_ptbl ? old_ptbl->len : 0;
+	int target = partno + 1;
+	size_t size;
+	int i;
+
+	/* disk_max_parts() is zero during initialization, ignore if so */
+	if (disk_max_parts(disk) && target > disk_max_parts(disk))
+		return -EINVAL;
+
+	if (target <= len)
+		return 0;
+
+	size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
+	new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
+	if (!new_ptbl)
+		return -ENOMEM;
+
+	INIT_RCU_HEAD(&new_ptbl->rcu_head);
+	new_ptbl->len = target;
+
+	for (i = 0; i < len; i++)
+		rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
+
+	disk_replace_part_tbl(disk, new_ptbl);
+	return 0;
+}
+
 static void disk_release(struct device *dev)
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
 	kfree(disk->random);
-	kfree(disk->part);
-	free_disk_stats(disk);
+	disk_replace_part_tbl(disk, NULL);
+	free_part_stats(&disk->part0);
 	kfree(disk);
 }
 struct class block_class = {
@@ -578,83 +950,31 @@
  * The output looks suspiciously like /proc/partitions with a bunch of
  * extra fields.
  */
-
-static void *diskstats_start(struct seq_file *part, loff_t *pos)
-{
-	struct device *dev;
-	loff_t k = *pos;
-
-	mutex_lock(&block_class_lock);
-	dev = class_find_device(&block_class, NULL, &k, find_start);
-	if (dev) {
-		put_device(dev);
-		return dev_to_disk(dev);
-	}
-	return NULL;
-}
-
-static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos)
+static int diskstats_show(struct seq_file *seqf, void *v)
 {
 	struct gendisk *gp = v;
-	struct device *dev;
-
-	++*pos;
-	dev = class_find_device(&block_class, &gp->dev, NULL, find_next);
-	if (dev) {
-		put_device(dev);
-		return dev_to_disk(dev);
-	}
-	return NULL;
-}
-
-static void diskstats_stop(struct seq_file *part, void *v)
-{
-	mutex_unlock(&block_class_lock);
-}
-
-static int diskstats_show(struct seq_file *s, void *v)
-{
-	struct gendisk *gp = v;
+	struct disk_part_iter piter;
+	struct hd_struct *hd;
 	char buf[BDEVNAME_SIZE];
-	int n = 0;
+	int cpu;
 
 	/*
-	if (&gp->dev.kobj.entry == block_class.devices.next)
-		seq_puts(s,	"major minor name"
+	if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
+		seq_puts(seqf,	"major minor name"
 				"     rio rmerge rsect ruse wio wmerge "
 				"wsect wuse running use aveq"
 				"\n\n");
 	*/
  
-	preempt_disable();
-	disk_round_stats(gp);
-	preempt_enable();
-	seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n",
-		gp->major, n + gp->first_minor, disk_name(gp, n, buf),
-		disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]),
-		(unsigned long long)disk_stat_read(gp, sectors[0]),
-		jiffies_to_msecs(disk_stat_read(gp, ticks[0])),
-		disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]),
-		(unsigned long long)disk_stat_read(gp, sectors[1]),
-		jiffies_to_msecs(disk_stat_read(gp, ticks[1])),
-		gp->in_flight,
-		jiffies_to_msecs(disk_stat_read(gp, io_ticks)),
-		jiffies_to_msecs(disk_stat_read(gp, time_in_queue)));
-
-	/* now show all non-0 size partitions of it */
-	for (n = 0; n < gp->minors - 1; n++) {
-		struct hd_struct *hd = gp->part[n];
-
-		if (!hd || !hd->nr_sects)
-			continue;
-
-		preempt_disable();
-		part_round_stats(hd);
-		preempt_enable();
-		seq_printf(s, "%4d %4d %s %lu %lu %llu "
+	disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0);
+	while ((hd = disk_part_iter_next(&piter))) {
+		cpu = part_stat_lock();
+		part_round_stats(cpu, hd);
+		part_stat_unlock();
+		seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
 			   "%u %lu %lu %llu %u %u %u %u\n",
-			   gp->major, n + gp->first_minor + 1,
-			   disk_name(gp, n + 1, buf),
+			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
+			   disk_name(gp, hd->partno, buf),
 			   part_stat_read(hd, ios[0]),
 			   part_stat_read(hd, merges[0]),
 			   (unsigned long long)part_stat_read(hd, sectors[0]),
@@ -668,14 +988,15 @@
 			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
 			);
 	}
+	disk_part_iter_exit(&piter);
  
 	return 0;
 }
 
 const struct seq_operations diskstats_op = {
-	.start	= diskstats_start,
-	.next	= diskstats_next,
-	.stop	= diskstats_stop,
+	.start	= disk_seqf_start,
+	.next	= disk_seqf_next,
+	.stop	= disk_seqf_stop,
 	.show	= diskstats_show
 };
 #endif /* CONFIG_PROC_FS */
@@ -690,7 +1011,7 @@
 	 * set enviroment vars to indicate which event this is for
 	 * so that user space will know to go check the media status.
 	 */
-	kobject_uevent_env(&gd->dev.kobj, KOBJ_CHANGE, envp);
+	kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
 	put_device(gd->driverfs_dev);
 }
 
@@ -703,42 +1024,29 @@
 EXPORT_SYMBOL_GPL(genhd_media_change_notify);
 #endif  /*  0  */
 
-struct find_block {
-	const char *name;
-	int part;
-};
-
-static int match_id(struct device *dev, void *data)
+dev_t blk_lookup_devt(const char *name, int partno)
 {
-	struct find_block *find = data;
-
-	if (dev->type != &disk_type)
-		return 0;
-	if (strcmp(dev->bus_id, find->name) == 0) {
-		struct gendisk *disk = dev_to_disk(dev);
-		if (find->part < disk->minors)
-			return 1;
-	}
-	return 0;
-}
-
-dev_t blk_lookup_devt(const char *name, int part)
-{
-	struct device *dev;
 	dev_t devt = MKDEV(0, 0);
-	struct find_block find;
+	struct class_dev_iter iter;
+	struct device *dev;
 
-	mutex_lock(&block_class_lock);
-	find.name = name;
-	find.part = part;
-	dev = class_find_device(&block_class, NULL, &find, match_id);
-	if (dev) {
-		put_device(dev);
-		devt = MKDEV(MAJOR(dev->devt),
-			     MINOR(dev->devt) + part);
+	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+	while ((dev = class_dev_iter_next(&iter))) {
+		struct gendisk *disk = dev_to_disk(dev);
+		struct hd_struct *part;
+
+		if (strcmp(dev->bus_id, name))
+			continue;
+
+		part = disk_get_part(disk, partno);
+		if (part) {
+			devt = part_devt(part);
+			disk_put_part(part);
+			break;
+		}
+		disk_put_part(part);
 	}
-	mutex_unlock(&block_class_lock);
-
+	class_dev_iter_exit(&iter);
 	return devt;
 }
 EXPORT_SYMBOL(blk_lookup_devt);
@@ -747,6 +1055,7 @@
 {
 	return alloc_disk_node(minors, -1);
 }
+EXPORT_SYMBOL(alloc_disk);
 
 struct gendisk *alloc_disk_node(int minors, int node_id)
 {
@@ -755,32 +1064,28 @@
 	disk = kmalloc_node(sizeof(struct gendisk),
 				GFP_KERNEL | __GFP_ZERO, node_id);
 	if (disk) {
-		if (!init_disk_stats(disk)) {
+		if (!init_part_stats(&disk->part0)) {
 			kfree(disk);
 			return NULL;
 		}
-		if (minors > 1) {
-			int size = (minors - 1) * sizeof(struct hd_struct *);
-			disk->part = kmalloc_node(size,
-				GFP_KERNEL | __GFP_ZERO, node_id);
-			if (!disk->part) {
-				free_disk_stats(disk);
-				kfree(disk);
-				return NULL;
-			}
+		if (disk_expand_part_tbl(disk, 0)) {
+			free_part_stats(&disk->part0);
+			kfree(disk);
+			return NULL;
 		}
+		disk->part_tbl->part[0] = &disk->part0;
+
 		disk->minors = minors;
 		rand_initialize_disk(disk);
-		disk->dev.class = &block_class;
-		disk->dev.type = &disk_type;
-		device_initialize(&disk->dev);
+		disk_to_dev(disk)->class = &block_class;
+		disk_to_dev(disk)->type = &disk_type;
+		device_initialize(disk_to_dev(disk));
 		INIT_WORK(&disk->async_notify,
 			media_change_notify_thread);
+		disk->node_id = node_id;
 	}
 	return disk;
 }
-
-EXPORT_SYMBOL(alloc_disk);
 EXPORT_SYMBOL(alloc_disk_node);
 
 struct kobject *get_disk(struct gendisk *disk)
@@ -793,7 +1098,7 @@
 	owner = disk->fops->owner;
 	if (owner && !try_module_get(owner))
 		return NULL;
-	kobj = kobject_get(&disk->dev.kobj);
+	kobj = kobject_get(&disk_to_dev(disk)->kobj);
 	if (kobj == NULL) {
 		module_put(owner);
 		return NULL;
@@ -807,27 +1112,28 @@
 void put_disk(struct gendisk *disk)
 {
 	if (disk)
-		kobject_put(&disk->dev.kobj);
+		kobject_put(&disk_to_dev(disk)->kobj);
 }
 
 EXPORT_SYMBOL(put_disk);
 
 void set_device_ro(struct block_device *bdev, int flag)
 {
-	if (bdev->bd_contains != bdev)
-		bdev->bd_part->policy = flag;
-	else
-		bdev->bd_disk->policy = flag;
+	bdev->bd_part->policy = flag;
 }
 
 EXPORT_SYMBOL(set_device_ro);
 
 void set_disk_ro(struct gendisk *disk, int flag)
 {
-	int i;
-	disk->policy = flag;
-	for (i = 0; i < disk->minors - 1; i++)
-		if (disk->part[i]) disk->part[i]->policy = flag;
+	struct disk_part_iter piter;
+	struct hd_struct *part;
+
+	disk_part_iter_init(&piter, disk,
+			    DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0);
+	while ((part = disk_part_iter_next(&piter)))
+		part->policy = flag;
+	disk_part_iter_exit(&piter);
 }
 
 EXPORT_SYMBOL(set_disk_ro);
@@ -836,18 +1142,15 @@
 {
 	if (!bdev)
 		return 0;
-	else if (bdev->bd_contains != bdev)
-		return bdev->bd_part->policy;
-	else
-		return bdev->bd_disk->policy;
+	return bdev->bd_part->policy;
 }
 
 EXPORT_SYMBOL(bdev_read_only);
 
-int invalidate_partition(struct gendisk *disk, int index)
+int invalidate_partition(struct gendisk *disk, int partno)
 {
 	int res = 0;
-	struct block_device *bdev = bdget_disk(disk, index);
+	struct block_device *bdev = bdget_disk(disk, partno);
 	if (bdev) {
 		fsync_bdev(bdev);
 		res = __invalidate_device(bdev);

diff --git a/block/ioctl.c b/block/ioctl.c
index 77185e5..38bee32 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c

@@ -12,11 +12,12 @@
 {
 	struct block_device *bdevp;
 	struct gendisk *disk;
+	struct hd_struct *part;
 	struct blkpg_ioctl_arg a;
 	struct blkpg_partition p;
+	struct disk_part_iter piter;
 	long long start, length;
-	int part;
-	int i;
+	int partno;
 	int err;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -28,8 +29,8 @@
 	disk = bdev->bd_disk;
 	if (bdev != bdev->bd_contains)
 		return -EINVAL;
-	part = p.pno;
-	if (part <= 0 || part >= disk->minors)
+	partno = p.pno;
+	if (partno <= 0)
 		return -EINVAL;
 	switch (a.op) {
 		case BLKPG_ADD_PARTITION:
@@ -43,36 +44,37 @@
 				    || pstart < 0 || plength < 0)
 					return -EINVAL;
 			}
-			/* partition number in use? */
-			mutex_lock(&bdev->bd_mutex);
-			if (disk->part[part - 1]) {
-				mutex_unlock(&bdev->bd_mutex);
-				return -EBUSY;
-			}
-			/* overlap? */
-			for (i = 0; i < disk->minors - 1; i++) {
-				struct hd_struct *s = disk->part[i];
 
-				if (!s)
-					continue;
-				if (!(start+length <= s->start_sect ||
-				      start >= s->start_sect + s->nr_sects)) {
+			mutex_lock(&bdev->bd_mutex);
+
+			/* overlap? */
+			disk_part_iter_init(&piter, disk,
+					    DISK_PITER_INCL_EMPTY);
+			while ((part = disk_part_iter_next(&piter))) {
+				if (!(start + length <= part->start_sect ||
+				      start >= part->start_sect + part->nr_sects)) {
+					disk_part_iter_exit(&piter);
 					mutex_unlock(&bdev->bd_mutex);
 					return -EBUSY;
 				}
 			}
+			disk_part_iter_exit(&piter);
+
 			/* all seems OK */
-			err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE);
+			err = add_partition(disk, partno, start, length,
+					    ADDPART_FLAG_NONE);
 			mutex_unlock(&bdev->bd_mutex);
 			return err;
 		case BLKPG_DEL_PARTITION:
-			if (!disk->part[part-1])
+			part = disk_get_part(disk, partno);
+			if (!part)
 				return -ENXIO;
-			if (disk->part[part - 1]->nr_sects == 0)
-				return -ENXIO;
-			bdevp = bdget_disk(disk, part);
+
+			bdevp = bdget(part_devt(part));
+			disk_put_part(part);
 			if (!bdevp)
 				return -ENOMEM;
+
 			mutex_lock(&bdevp->bd_mutex);
 			if (bdevp->bd_openers) {
 				mutex_unlock(&bdevp->bd_mutex);
@@ -84,7 +86,7 @@
 			invalidate_bdev(bdevp);
 
 			mutex_lock_nested(&bdev->bd_mutex, 1);
-			delete_partition(disk, part);
+			delete_partition(disk, partno);
 			mutex_unlock(&bdev->bd_mutex);
 			mutex_unlock(&bdevp->bd_mutex);
 			bdput(bdevp);
@@ -100,7 +102,7 @@
 	struct gendisk *disk = bdev->bd_disk;
 	int res;
 
-	if (disk->minors == 1 || bdev != bdev->bd_contains)
+	if (!disk_partitionable(disk) || bdev != bdev->bd_contains)
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
@@ -111,6 +113,69 @@
 	return res;
 }
 
+static void blk_ioc_discard_endio(struct bio *bio, int err)
+{
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+	}
+	complete(bio->bi_private);
+}
+
+static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
+			     uint64_t len)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+	int ret = 0;
+
+	if (start & 511)
+		return -EINVAL;
+	if (len & 511)
+		return -EINVAL;
+	start >>= 9;
+	len >>= 9;
+
+	if (start + len > (bdev->bd_inode->i_size >> 9))
+		return -EINVAL;
+
+	if (!q->prepare_discard_fn)
+		return -EOPNOTSUPP;
+
+	while (len && !ret) {
+		DECLARE_COMPLETION_ONSTACK(wait);
+		struct bio *bio;
+
+		bio = bio_alloc(GFP_KERNEL, 0);
+		if (!bio)
+			return -ENOMEM;
+
+		bio->bi_end_io = blk_ioc_discard_endio;
+		bio->bi_bdev = bdev;
+		bio->bi_private = &wait;
+		bio->bi_sector = start;
+
+		if (len > q->max_hw_sectors) {
+			bio->bi_size = q->max_hw_sectors << 9;
+			len -= q->max_hw_sectors;
+			start += q->max_hw_sectors;
+		} else {
+			bio->bi_size = len << 9;
+			len = 0;
+		}
+		submit_bio(DISCARD_NOBARRIER, bio);
+
+		wait_for_completion(&wait);
+
+		if (bio_flagged(bio, BIO_EOPNOTSUPP))
+			ret = -EOPNOTSUPP;
+		else if (!bio_flagged(bio, BIO_UPTODATE))
+			ret = -EIO;
+		bio_put(bio);
+	}
+	return ret;
+}
+
 static int put_ushort(unsigned long arg, unsigned short val)
 {
 	return put_user(val, (unsigned short __user *)arg);
@@ -258,6 +323,19 @@
 		set_device_ro(bdev, n);
 		unlock_kernel();
 		return 0;
+
+	case BLKDISCARD: {
+		uint64_t range[2];
+
+		if (!(file->f_mode & FMODE_WRITE))
+			return -EBADF;
+
+		if (copy_from_user(range, (void __user *)arg, sizeof(range)))
+			return -EFAULT;
+
+		return blk_ioctl_discard(bdev, range[0], range[1]);
+	}
+
 	case HDIO_GETGEO: {
 		struct hd_geometry geo;
 

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index ec4b7f2..c34272a 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c

@@ -185,6 +185,7 @@
 	__set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
 	__set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
 	__set_bit(GPCMD_SET_STREAMING, filter->write_ok);
+	__set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok);
 }
 EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults);
 
@@ -313,11 +314,12 @@
 			goto out;
 		}
 
-		ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count,
-					  hdr->dxfer_len);
+		ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count,
+					  hdr->dxfer_len, GFP_KERNEL);
 		kfree(iov);
 	} else if (hdr->dxfer_len)
-		ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len);
+		ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len,
+				      GFP_KERNEL);
 
 	if (ret)
 		goto out;

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 0841095..8dd3336 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c

@@ -165,8 +165,11 @@
 				"firmware_node");
 		ret = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj,
 				"physical_node");
-		if (acpi_dev->wakeup.flags.valid)
+		if (acpi_dev->wakeup.flags.valid) {
 			device_set_wakeup_capable(dev, true);
+			device_set_wakeup_enable(dev,
+						acpi_dev->wakeup.state.enabled);
+		}
 	}
 
 	return 0;

diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c
index 4ebbba2..bf5b04d 100644
--- a/drivers/acpi/sleep/proc.c
+++ b/drivers/acpi/sleep/proc.c

@@ -377,6 +377,14 @@
 	return 0;
 }
 
+static void physical_device_enable_wakeup(struct acpi_device *adev)
+{
+	struct device *dev = acpi_get_physical_device(adev->handle);
+
+	if (dev && device_can_wakeup(dev))
+		device_set_wakeup_enable(dev, adev->wakeup.state.enabled);
+}
+
 static ssize_t
 acpi_system_write_wakeup_device(struct file *file,
 				const char __user * buffer,
@@ -411,6 +419,7 @@
 		}
 	}
 	if (found_dev) {
+		physical_device_enable_wakeup(found_dev);
 		list_for_each_safe(node, next, &acpi_wakeup_device_list) {
 			struct acpi_device *dev = container_of(node,
 							       struct
@@ -428,6 +437,7 @@
 				       dev->pnp.bus_id, found_dev->pnp.bus_id);
 				dev->wakeup.state.enabled =
 				    found_dev->wakeup.state.enabled;
+				physical_device_enable_wakeup(dev);
 			}
 		}
 	}

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 11c8c19..f17cd4b 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig

@@ -663,7 +663,7 @@
 
 config PATA_PLATFORM
 	tristate "Generic platform device PATA support"
-	depends on EMBEDDED || ARCH_RPC || PPC || HAVE_PATA_PLATFORM
+	depends on EMBEDDED || PPC || HAVE_PATA_PLATFORM
 	help
 	  This option enables support for generic directly connected ATA
 	  devices commonly found on embedded systems.

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 2e1a7cb..aeadd00 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c

@@ -267,8 +267,8 @@
 					 	 * per PM slot */
 };
 
-static int ahci_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int ahci_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int ahci_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int ahci_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
 static unsigned int ahci_qc_issue(struct ata_queued_cmd *qc);
 static bool ahci_qc_fill_rtf(struct ata_queued_cmd *qc);
@@ -316,6 +316,7 @@
 
 static struct device_attribute *ahci_sdev_attrs[] = {
 	&dev_attr_sw_activity,
+	&dev_attr_unload_heads,
 	NULL
 };
 
@@ -820,10 +821,10 @@
 	return 0;
 }
 
-static int ahci_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int ahci_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
-	void __iomem *port_mmio = ahci_port_base(ap);
-	int offset = ahci_scr_offset(ap, sc_reg);
+	void __iomem *port_mmio = ahci_port_base(link->ap);
+	int offset = ahci_scr_offset(link->ap, sc_reg);
 
 	if (offset) {
 		*val = readl(port_mmio + offset);
@@ -832,10 +833,10 @@
 	return -EINVAL;
 }
 
-static int ahci_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int ahci_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
-	void __iomem *port_mmio = ahci_port_base(ap);
-	int offset = ahci_scr_offset(ap, sc_reg);
+	void __iomem *port_mmio = ahci_port_base(link->ap);
+	int offset = ahci_scr_offset(link->ap, sc_reg);
 
 	if (offset) {
 		writel(val, port_mmio + offset);
@@ -973,7 +974,7 @@
 	writel(PORT_IRQ_PHYRDY, port_mmio + PORT_IRQ_STAT);
 
 	/* go ahead and clean out PhyRdy Change from Serror too */
-	ahci_scr_write(ap, SCR_ERROR, ((1 << 16) | (1 << 18)));
+	ahci_scr_write(&ap->link, SCR_ERROR, ((1 << 16) | (1 << 18)));
 
 	/*
  	 * Clear flag to indicate that we should ignore all PhyRdy
@@ -1937,8 +1938,8 @@
 	ata_ehi_push_desc(host_ehi, "irq_stat 0x%08x", irq_stat);
 
 	/* AHCI needs SError cleared; otherwise, it might lock up */
-	ahci_scr_read(ap, SCR_ERROR, &serror);
-	ahci_scr_write(ap, SCR_ERROR, serror);
+	ahci_scr_read(&ap->link, SCR_ERROR, &serror);
+	ahci_scr_write(&ap->link, SCR_ERROR, serror);
 	host_ehi->serror |= serror;
 
 	/* some controllers set IRQ_IF_ERR on device errors, ignore it */
@@ -2027,7 +2028,7 @@
 	if ((hpriv->flags & AHCI_HFLAG_NO_HOTPLUG) &&
 		(status & PORT_IRQ_PHYRDY)) {
 		status &= ~PORT_IRQ_PHYRDY;
-		ahci_scr_write(ap, SCR_ERROR, ((1 << 16) | (1 << 18)));
+		ahci_scr_write(&ap->link, SCR_ERROR, ((1 << 16) | (1 << 18)));
 	}
 
 	if (unlikely(status & PORT_IRQ_ERROR)) {

diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index e6b4606..e9e32ed 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c

@@ -165,8 +165,10 @@
 static void ich_set_dmamode(struct ata_port *ap, struct ata_device *adev);
 static int ich_pata_cable_detect(struct ata_port *ap);
 static u8 piix_vmw_bmdma_status(struct ata_port *ap);
-static int piix_sidpr_scr_read(struct ata_port *ap, unsigned int reg, u32 *val);
-static int piix_sidpr_scr_write(struct ata_port *ap, unsigned int reg, u32 val);
+static int piix_sidpr_scr_read(struct ata_link *link,
+			       unsigned int reg, u32 *val);
+static int piix_sidpr_scr_write(struct ata_link *link,
+				unsigned int reg, u32 val);
 #ifdef CONFIG_PM
 static int piix_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg);
 static int piix_pci_device_resume(struct pci_dev *pdev);
@@ -278,12 +280,15 @@
 	/* SATA Controller IDE (PCH) */
 	{ 0x8086, 0x3b20, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata },
 	/* SATA Controller IDE (PCH) */
+	{ 0x8086, 0x3b21, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata },
+	/* SATA Controller IDE (PCH) */
 	{ 0x8086, 0x3b26, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata },
 	/* SATA Controller IDE (PCH) */
+	{ 0x8086, 0x3b28, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata },
+	/* SATA Controller IDE (PCH) */
 	{ 0x8086, 0x3b2d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata },
 	/* SATA Controller IDE (PCH) */
 	{ 0x8086, 0x3b2e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata },
-
 	{ }	/* terminate list */
 };
 
@@ -582,6 +587,7 @@
 	{ 0x27DF, 0x1025, 0x0110 },	/* ICH7 on Acer 3682WLMi */
 	{ 0x27DF, 0x1043, 0x1267 },	/* ICH7 on Asus W5F */
 	{ 0x27DF, 0x103C, 0x30A1 },	/* ICH7 on HP Compaq nc2400 */
+	{ 0x27DF, 0x1071, 0xD221 },	/* ICH7 on Hercules EC-900 */
 	{ 0x24CA, 0x1025, 0x0061 },	/* ICH4 on ACER Aspire 2023WLMi */
 	{ 0x24CA, 0x1025, 0x003d },	/* ICH4 on ACER TM290 */
 	{ 0x266F, 0x1025, 0x0066 },	/* ICH6 on ACER Aspire 1694WLMi */
@@ -885,23 +891,9 @@
  * Serial ATA Index/Data Pair Superset Registers access
  *
  * Beginning from ICH8, there's a sane way to access SCRs using index
- * and data register pair located at BAR5.  This creates an
- * interesting problem of mapping two SCRs to one port.
- *
- * Although they have separate SCRs, the master and slave aren't
- * independent enough to be treated as separate links - e.g. softreset
- * resets both.  Also, there's no protocol defined for hard resetting
- * singled device sharing the virtual port (no defined way to acquire
- * device signature).  This is worked around by merging the SCR values
- * into one sensible value and requesting follow-up SRST after
- * hardreset.
- *
- * SCR merging is perfomed in nibbles which is the unit contents in
- * SCRs are organized.  If two values are equal, the value is used.
- * When they differ, merge table which lists precedence of possible
- * values is consulted and the first match or the last entry when
- * nothing matches is used.  When there's no merge table for the
- * specific nibble, value from the first port is used.
+ * and data register pair located at BAR5 which means that we have
+ * separate SCRs for master and slave.  This is handled using libata
+ * slave_link facility.
  */
 static const int piix_sidx_map[] = {
 	[SCR_STATUS]	= 0,
@@ -909,120 +901,38 @@
 	[SCR_CONTROL]	= 1,
 };
 
-static void piix_sidpr_sel(struct ata_device *dev, unsigned int reg)
+static void piix_sidpr_sel(struct ata_link *link, unsigned int reg)
 {
-	struct ata_port *ap = dev->link->ap;
+	struct ata_port *ap = link->ap;
 	struct piix_host_priv *hpriv = ap->host->private_data;
 
-	iowrite32(((ap->port_no * 2 + dev->devno) << 8) | piix_sidx_map[reg],
+	iowrite32(((ap->port_no * 2 + link->pmp) << 8) | piix_sidx_map[reg],
 		  hpriv->sidpr + PIIX_SIDPR_IDX);
 }
 
-static int piix_sidpr_read(struct ata_device *dev, unsigned int reg)
+static int piix_sidpr_scr_read(struct ata_link *link,
+			       unsigned int reg, u32 *val)
 {
-	struct piix_host_priv *hpriv = dev->link->ap->host->private_data;
-
-	piix_sidpr_sel(dev, reg);
-	return ioread32(hpriv->sidpr + PIIX_SIDPR_DATA);
-}
-
-static void piix_sidpr_write(struct ata_device *dev, unsigned int reg, u32 val)
-{
-	struct piix_host_priv *hpriv = dev->link->ap->host->private_data;
-
-	piix_sidpr_sel(dev, reg);
-	iowrite32(val, hpriv->sidpr + PIIX_SIDPR_DATA);
-}
-
-static u32 piix_merge_scr(u32 val0, u32 val1, const int * const *merge_tbl)
-{
-	u32 val = 0;
-	int i, mi;
-
-	for (i = 0, mi = 0; i < 32 / 4; i++) {
-		u8 c0 = (val0 >> (i * 4)) & 0xf;
-		u8 c1 = (val1 >> (i * 4)) & 0xf;
-		u8 merged = c0;
-		const int *cur;
-
-		/* if no merge preference, assume the first value */
-		cur = merge_tbl[mi];
-		if (!cur)
-			goto done;
-		mi++;
-
-		/* if two values equal, use it */
-		if (c0 == c1)
-			goto done;
-
-		/* choose the first match or the last from the merge table */
-		while (*cur != -1) {
-			if (c0 == *cur || c1 == *cur)
-				break;
-			cur++;
-		}
-		if (*cur == -1)
-			cur--;
-		merged = *cur;
-	done:
-		val |= merged << (i * 4);
-	}
-
-	return val;
-}
-
-static int piix_sidpr_scr_read(struct ata_port *ap, unsigned int reg, u32 *val)
-{
-	const int * const sstatus_merge_tbl[] = {
-		/* DET */ (const int []){ 1, 3, 0, 4, 3, -1 },
-		/* SPD */ (const int []){ 2, 1, 0, -1 },
-		/* IPM */ (const int []){ 6, 2, 1, 0, -1 },
-		NULL,
-	};
-	const int * const scontrol_merge_tbl[] = {
-		/* DET */ (const int []){ 1, 0, 4, 0, -1 },
-		/* SPD */ (const int []){ 0, 2, 1, 0, -1 },
-		/* IPM */ (const int []){ 0, 1, 2, 3, 0, -1 },
-		NULL,
-	};
-	u32 v0, v1;
+	struct piix_host_priv *hpriv = link->ap->host->private_data;
 
 	if (reg >= ARRAY_SIZE(piix_sidx_map))
 		return -EINVAL;
 
-	if (!(ap->flags & ATA_FLAG_SLAVE_POSS)) {
-		*val = piix_sidpr_read(&ap->link.device[0], reg);
-		return 0;
-	}
-
-	v0 = piix_sidpr_read(&ap->link.device[0], reg);
-	v1 = piix_sidpr_read(&ap->link.device[1], reg);
-
-	switch (reg) {
-	case SCR_STATUS:
-		*val = piix_merge_scr(v0, v1, sstatus_merge_tbl);
-		break;
-	case SCR_ERROR:
-		*val = v0 | v1;
-		break;
-	case SCR_CONTROL:
-		*val = piix_merge_scr(v0, v1, scontrol_merge_tbl);
-		break;
-	}
-
+	piix_sidpr_sel(link, reg);
+	*val = ioread32(hpriv->sidpr + PIIX_SIDPR_DATA);
 	return 0;
 }
 
-static int piix_sidpr_scr_write(struct ata_port *ap, unsigned int reg, u32 val)
+static int piix_sidpr_scr_write(struct ata_link *link,
+				unsigned int reg, u32 val)
 {
+	struct piix_host_priv *hpriv = link->ap->host->private_data;
+
 	if (reg >= ARRAY_SIZE(piix_sidx_map))
 		return -EINVAL;
 
-	piix_sidpr_write(&ap->link.device[0], reg, val);
-
-	if (ap->flags & ATA_FLAG_SLAVE_POSS)
-		piix_sidpr_write(&ap->link.device[1], reg, val);
-
+	piix_sidpr_sel(link, reg);
+	iowrite32(val, hpriv->sidpr + PIIX_SIDPR_DATA);
 	return 0;
 }
 
@@ -1363,28 +1273,28 @@
 	return map;
 }
 
-static void __devinit piix_init_sidpr(struct ata_host *host)
+static int __devinit piix_init_sidpr(struct ata_host *host)
 {
 	struct pci_dev *pdev = to_pci_dev(host->dev);
 	struct piix_host_priv *hpriv = host->private_data;
-	struct ata_device *dev0 = &host->ports[0]->link.device[0];
+	struct ata_link *link0 = &host->ports[0]->link;
 	u32 scontrol;
-	int i;
+	int i, rc;
 
 	/* check for availability */
 	for (i = 0; i < 4; i++)
 		if (hpriv->map[i] == IDE)
-			return;
+			return 0;
 
 	if (!(host->ports[0]->flags & PIIX_FLAG_SIDPR))
-		return;
+		return 0;
 
 	if (pci_resource_start(pdev, PIIX_SIDPR_BAR) == 0 ||
 	    pci_resource_len(pdev, PIIX_SIDPR_BAR) != PIIX_SIDPR_LEN)
-		return;
+		return 0;
 
 	if (pcim_iomap_regions(pdev, 1 << PIIX_SIDPR_BAR, DRV_NAME))
-		return;
+		return 0;
 
 	hpriv->sidpr = pcim_iomap_table(pdev)[PIIX_SIDPR_BAR];
 
@@ -1392,7 +1302,7 @@
 	 * Give it a test drive by inhibiting power save modes which
 	 * we'll do anyway.
 	 */
-	scontrol = piix_sidpr_read(dev0, SCR_CONTROL);
+	piix_sidpr_scr_read(link0, SCR_CONTROL, &scontrol);
 
 	/* if IPM is already 3, SCR access is probably working.  Don't
 	 * un-inhibit power save modes as BIOS might have inhibited
@@ -1400,18 +1310,30 @@
 	 */
 	if ((scontrol & 0xf00) != 0x300) {
 		scontrol |= 0x300;
-		piix_sidpr_write(dev0, SCR_CONTROL, scontrol);
-		scontrol = piix_sidpr_read(dev0, SCR_CONTROL);
+		piix_sidpr_scr_write(link0, SCR_CONTROL, scontrol);
+		piix_sidpr_scr_read(link0, SCR_CONTROL, &scontrol);
 
 		if ((scontrol & 0xf00) != 0x300) {
 			dev_printk(KERN_INFO, host->dev, "SCR access via "
 				   "SIDPR is available but doesn't work\n");
-			return;
+			return 0;
 		}
 	}
 
-	host->ports[0]->ops = &piix_sidpr_sata_ops;
-	host->ports[1]->ops = &piix_sidpr_sata_ops;
+	/* okay, SCRs available, set ops and ask libata for slave_link */
+	for (i = 0; i < 2; i++) {
+		struct ata_port *ap = host->ports[i];
+
+		ap->ops = &piix_sidpr_sata_ops;
+
+		if (ap->flags & ATA_FLAG_SLAVE_POSS) {
+			rc = ata_slave_link_init(ap);
+			if (rc)
+				return rc;
+		}
+	}
+
+	return 0;
 }
 
 static void piix_iocfg_bit18_quirk(struct pci_dev *pdev)
@@ -1521,7 +1443,9 @@
 	/* initialize controller */
 	if (port_flags & ATA_FLAG_SATA) {
 		piix_init_pcs(host, piix_map_db_table[ent->driver_data]);
-		piix_init_sidpr(host);
+		rc = piix_init_sidpr(host);
+		if (rc)
+			return rc;
 	}
 
 	/* apply IOCFG bit18 quirk */

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 79e3a8e..1ee9499 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c

@@ -163,6 +163,67 @@
 MODULE_VERSION(DRV_VERSION);
 
 
+/*
+ * Iterator helpers.  Don't use directly.
+ *
+ * LOCKING:
+ * Host lock or EH context.
+ */
+struct ata_link *__ata_port_next_link(struct ata_port *ap,
+				      struct ata_link *link, bool dev_only)
+{
+	/* NULL link indicates start of iteration */
+	if (!link) {
+		if (dev_only && sata_pmp_attached(ap))
+			return ap->pmp_link;
+		return &ap->link;
+	}
+
+	/* we just iterated over the host master link, what's next? */
+	if (link == &ap->link) {
+		if (!sata_pmp_attached(ap)) {
+			if (unlikely(ap->slave_link) && !dev_only)
+				return ap->slave_link;
+			return NULL;
+		}
+		return ap->pmp_link;
+	}
+
+	/* slave_link excludes PMP */
+	if (unlikely(link == ap->slave_link))
+		return NULL;
+
+	/* iterate to the next PMP link */
+	if (++link < ap->pmp_link + ap->nr_pmp_links)
+		return link;
+	return NULL;
+}
+
+/**
+ *	ata_dev_phys_link - find physical link for a device
+ *	@dev: ATA device to look up physical link for
+ *
+ *	Look up physical link which @dev is attached to.  Note that
+ *	this is different from @dev->link only when @dev is on slave
+ *	link.  For all other cases, it's the same as @dev->link.
+ *
+ *	LOCKING:
+ *	Don't care.
+ *
+ *	RETURNS:
+ *	Pointer to the found physical link.
+ */
+struct ata_link *ata_dev_phys_link(struct ata_device *dev)
+{
+	struct ata_port *ap = dev->link->ap;
+
+	if (!ap->slave_link)
+		return dev->link;
+	if (!dev->devno)
+		return &ap->link;
+	return ap->slave_link;
+}
+
 /**
  *	ata_force_cbl - force cable type according to libata.force
  *	@ap: ATA port of interest
@@ -206,7 +267,8 @@
  *	the host link and all fan-out ports connected via PMP.  If the
  *	device part is specified as 0 (e.g. 1.00:), it specifies the
  *	first fan-out link not the host link.  Device number 15 always
- *	points to the host link whether PMP is attached or not.
+ *	points to the host link whether PMP is attached or not.  If the
+ *	controller has slave link, device number 16 points to it.
  *
  *	LOCKING:
  *	EH context.
@@ -214,12 +276,11 @@
 static void ata_force_link_limits(struct ata_link *link)
 {
 	bool did_spd = false;
-	int linkno, i;
+	int linkno = link->pmp;
+	int i;
 
 	if (ata_is_host_link(link))
-		linkno = 15;
-	else
-		linkno = link->pmp;
+		linkno += 15;
 
 	for (i = ata_force_tbl_size - 1; i >= 0; i--) {
 		const struct ata_force_ent *fe = &ata_force_tbl[i];
@@ -266,9 +327,9 @@
 	int alt_devno = devno;
 	int i;
 
-	/* allow n.15 for the first device attached to host port */
-	if (ata_is_host_link(dev->link) && devno == 0)
-		alt_devno = 15;
+	/* allow n.15/16 for devices attached to host port */
+	if (ata_is_host_link(dev->link))
+		alt_devno += 15;
 
 	for (i = ata_force_tbl_size - 1; i >= 0; i--) {
 		const struct ata_force_ent *fe = &ata_force_tbl[i];
@@ -320,9 +381,9 @@
 	int alt_devno = devno;
 	int i;
 
-	/* allow n.15 for the first device attached to host port */
-	if (ata_is_host_link(dev->link) && devno == 0)
-		alt_devno = 15;
+	/* allow n.15/16 for devices attached to host port */
+	if (ata_is_host_link(dev->link))
+		alt_devno += 15;
 
 	for (i = 0; i < ata_force_tbl_size; i++) {
 		const struct ata_force_ent *fe = &ata_force_tbl[i];
@@ -2681,7 +2742,7 @@
 		return;
 	sata_scr_read(link, SCR_CONTROL, &scontrol);
 
-	if (ata_link_online(link)) {
+	if (ata_phys_link_online(link)) {
 		tmp = (sstatus >> 4) & 0xf;
 		ata_link_printk(link, KERN_INFO,
 				"SATA link up %s (SStatus %X SControl %X)\n",
@@ -3372,6 +3433,12 @@
 	unsigned long nodev_deadline = ata_deadline(start, ATA_TMOUT_FF_WAIT);
 	int warned = 0;
 
+	/* Slave readiness can't be tested separately from master.  On
+	 * M/S emulation configuration, this function should be called
+	 * only on the master and it will handle both master and slave.
+	 */
+	WARN_ON(link == link->ap->slave_link);
+
 	if (time_after(nodev_deadline, deadline))
 		nodev_deadline = deadline;
 
@@ -3593,7 +3660,7 @@
 	}
 
 	/* no point in trying softreset on offline link */
-	if (ata_link_offline(link))
+	if (ata_phys_link_offline(link))
 		ehc->i.action &= ~ATA_EH_SOFTRESET;
 
 	return 0;
@@ -3671,7 +3738,7 @@
 	if (rc)
 		goto out;
 	/* if link is offline nothing more to do */
-	if (ata_link_offline(link))
+	if (ata_phys_link_offline(link))
 		goto out;
 
 	/* Link is online.  From this point, -ENODEV too is an error. */
@@ -4868,10 +4935,8 @@
 int sata_scr_read(struct ata_link *link, int reg, u32 *val)
 {
 	if (ata_is_host_link(link)) {
-		struct ata_port *ap = link->ap;
-
 		if (sata_scr_valid(link))
-			return ap->ops->scr_read(ap, reg, val);
+			return link->ap->ops->scr_read(link, reg, val);
 		return -EOPNOTSUPP;
 	}
 
@@ -4897,10 +4962,8 @@
 int sata_scr_write(struct ata_link *link, int reg, u32 val)
 {
 	if (ata_is_host_link(link)) {
-		struct ata_port *ap = link->ap;
-
 		if (sata_scr_valid(link))
-			return ap->ops->scr_write(ap, reg, val);
+			return link->ap->ops->scr_write(link, reg, val);
 		return -EOPNOTSUPP;
 	}
 
@@ -4925,13 +4988,12 @@
 int sata_scr_write_flush(struct ata_link *link, int reg, u32 val)
 {
 	if (ata_is_host_link(link)) {
-		struct ata_port *ap = link->ap;
 		int rc;
 
 		if (sata_scr_valid(link)) {
-			rc = ap->ops->scr_write(ap, reg, val);
+			rc = link->ap->ops->scr_write(link, reg, val);
 			if (rc == 0)
-				rc = ap->ops->scr_read(ap, reg, &val);
+				rc = link->ap->ops->scr_read(link, reg, &val);
 			return rc;
 		}
 		return -EOPNOTSUPP;
@@ -4941,7 +5003,7 @@
 }
 
 /**
- *	ata_link_online - test whether the given link is online
+ *	ata_phys_link_online - test whether the given link is online
  *	@link: ATA link to test
  *
  *	Test whether @link is online.  Note that this function returns
@@ -4952,20 +5014,20 @@
  *	None.
  *
  *	RETURNS:
- *	1 if the port online status is available and online.
+ *	True if the port online status is available and online.
  */
-int ata_link_online(struct ata_link *link)
+bool ata_phys_link_online(struct ata_link *link)
 {
 	u32 sstatus;
 
 	if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0 &&
 	    (sstatus & 0xf) == 0x3)
-		return 1;
-	return 0;
+		return true;
+	return false;
 }
 
 /**
- *	ata_link_offline - test whether the given link is offline
+ *	ata_phys_link_offline - test whether the given link is offline
  *	@link: ATA link to test
  *
  *	Test whether @link is offline.  Note that this function
@@ -4976,16 +5038,68 @@
  *	None.
  *
  *	RETURNS:
- *	1 if the port offline status is available and offline.
+ *	True if the port offline status is available and offline.
  */
-int ata_link_offline(struct ata_link *link)
+bool ata_phys_link_offline(struct ata_link *link)
 {
 	u32 sstatus;
 
 	if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0 &&
 	    (sstatus & 0xf) != 0x3)
-		return 1;
-	return 0;
+		return true;
+	return false;
+}
+
+/**
+ *	ata_link_online - test whether the given link is online
+ *	@link: ATA link to test
+ *
+ *	Test whether @link is online.  This is identical to
+ *	ata_phys_link_online() when there's no slave link.  When
+ *	there's a slave link, this function should only be called on
+ *	the master link and will return true if any of M/S links is
+ *	online.
+ *
+ *	LOCKING:
+ *	None.
+ *
+ *	RETURNS:
+ *	True if the port online status is available and online.
+ */
+bool ata_link_online(struct ata_link *link)
+{
+	struct ata_link *slave = link->ap->slave_link;
+
+	WARN_ON(link == slave);	/* shouldn't be called on slave link */
+
+	return ata_phys_link_online(link) ||
+		(slave && ata_phys_link_online(slave));
+}
+
+/**
+ *	ata_link_offline - test whether the given link is offline
+ *	@link: ATA link to test
+ *
+ *	Test whether @link is offline.  This is identical to
+ *	ata_phys_link_offline() when there's no slave link.  When
+ *	there's a slave link, this function should only be called on
+ *	the master link and will return true if both M/S links are
+ *	offline.
+ *
+ *	LOCKING:
+ *	None.
+ *
+ *	RETURNS:
+ *	True if the port offline status is available and offline.
+ */
+bool ata_link_offline(struct ata_link *link)
+{
+	struct ata_link *slave = link->ap->slave_link;
+
+	WARN_ON(link == slave);	/* shouldn't be called on slave link */
+
+	return ata_phys_link_offline(link) &&
+		(!slave || ata_phys_link_offline(slave));
 }
 
 #ifdef CONFIG_PM
@@ -5127,11 +5241,11 @@
  */
 void ata_dev_init(struct ata_device *dev)
 {
-	struct ata_link *link = dev->link;
+	struct ata_link *link = ata_dev_phys_link(dev);
 	struct ata_port *ap = link->ap;
 	unsigned long flags;
 
-	/* SATA spd limit is bound to the first device */
+	/* SATA spd limit is bound to the attached device, reset together */
 	link->sata_spd_limit = link->hw_sata_spd_limit;
 	link->sata_spd = 0;
 
@@ -5264,6 +5378,7 @@
 	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
 	INIT_LIST_HEAD(&ap->eh_done_q);
 	init_waitqueue_head(&ap->eh_wait_q);
+	init_completion(&ap->park_req_pending);
 	init_timer_deferrable(&ap->fastdrain_timer);
 	ap->fastdrain_timer.function = ata_eh_fastdrain_timerfn;
 	ap->fastdrain_timer.data = (unsigned long)ap;
@@ -5294,6 +5409,7 @@
 			scsi_host_put(ap->scsi_host);
 
 		kfree(ap->pmp_link);
+		kfree(ap->slave_link);
 		kfree(ap);
 		host->ports[i] = NULL;
 	}
@@ -5414,6 +5530,68 @@
 	return host;
 }
 
+/**
+ *	ata_slave_link_init - initialize slave link
+ *	@ap: port to initialize slave link for
+ *
+ *	Create and initialize slave link for @ap.  This enables slave
+ *	link handling on the port.
+ *
+ *	In libata, a port contains links and a link contains devices.
+ *	There is single host link but if a PMP is attached to it,
+ *	there can be multiple fan-out links.  On SATA, there's usually
+ *	a single device connected to a link but PATA and SATA
+ *	controllers emulating TF based interface can have two - master
+ *	and slave.
+ *
+ *	However, there are a few controllers which don't fit into this
+ *	abstraction too well - SATA controllers which emulate TF
+ *	interface with both master and slave devices but also have
+ *	separate SCR register sets for each device.  These controllers
+ *	need separate links for physical link handling
+ *	(e.g. onlineness, link speed) but should be treated like a
+ *	traditional M/S controller for everything else (e.g. command
+ *	issue, softreset).
+ *
+ *	slave_link is libata's way of handling this class of
+ *	controllers without impacting core layer too much.  For
+ *	anything other than physical link handling, the default host
+ *	link is used for both master and slave.  For physical link
+ *	handling, separate @ap->slave_link is used.  All dirty details
+ *	are implemented inside libata core layer.  From LLD's POV, the
+ *	only difference is that prereset, hardreset and postreset are
+ *	called once more for the slave link, so the reset sequence
+ *	looks like the following.
+ *
+ *	prereset(M) -> prereset(S) -> hardreset(M) -> hardreset(S) ->
+ *	softreset(M) -> postreset(M) -> postreset(S)
+ *
+ *	Note that softreset is called only for the master.  Softreset
+ *	resets both M/S by definition, so SRST on master should handle
+ *	both (the standard method will work just fine).
+ *
+ *	LOCKING:
+ *	Should be called before host is registered.
+ *
+ *	RETURNS:
+ *	0 on success, -errno on failure.
+ */
+int ata_slave_link_init(struct ata_port *ap)
+{
+	struct ata_link *link;
+
+	WARN_ON(ap->slave_link);
+	WARN_ON(ap->flags & ATA_FLAG_PMP);
+
+	link = kzalloc(sizeof(*link), GFP_KERNEL);
+	if (!link)
+		return -ENOMEM;
+
+	ata_link_init(ap, link, 1);
+	ap->slave_link = link;
+	return 0;
+}
+
 static void ata_host_stop(struct device *gendev, void *res)
 {
 	struct ata_host *host = dev_get_drvdata(gendev);
@@ -5640,6 +5818,8 @@
 
 		/* init sata_spd_limit to the current value */
 		sata_link_init_spd(&ap->link);
+		if (ap->slave_link)
+			sata_link_init_spd(ap->slave_link);
 
 		/* print per-port info to dmesg */
 		xfer_mask = ata_pack_xfermask(ap->pio_mask, ap->mwdma_mask,
@@ -6260,10 +6440,12 @@
 EXPORT_SYMBOL_GPL(sata_port_ops);
 EXPORT_SYMBOL_GPL(ata_dummy_port_ops);
 EXPORT_SYMBOL_GPL(ata_dummy_port_info);
+EXPORT_SYMBOL_GPL(__ata_port_next_link);
 EXPORT_SYMBOL_GPL(ata_std_bios_param);
 EXPORT_SYMBOL_GPL(ata_host_init);
 EXPORT_SYMBOL_GPL(ata_host_alloc);
 EXPORT_SYMBOL_GPL(ata_host_alloc_pinfo);
+EXPORT_SYMBOL_GPL(ata_slave_link_init);
 EXPORT_SYMBOL_GPL(ata_host_start);
 EXPORT_SYMBOL_GPL(ata_host_register);
 EXPORT_SYMBOL_GPL(ata_host_activate);

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index c1db2f2..a93247c 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c

@@ -33,6 +33,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/blkdev.h>
 #include <linux/pci.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
@@ -79,6 +80,8 @@
 	 */
 	ATA_EH_PRERESET_TIMEOUT		= 10000,
 	ATA_EH_FASTDRAIN_INTERVAL	=  3000,
+
+	ATA_EH_UA_TRIES			= 5,
 };
 
 /* The following table determines how we sequence resets.  Each entry
@@ -457,29 +460,29 @@
  *	RETURNS:
  *	EH_HANDLED or EH_NOT_HANDLED
  */
-enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
+enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
 {
 	struct Scsi_Host *host = cmd->device->host;
 	struct ata_port *ap = ata_shost_to_port(host);
 	unsigned long flags;
 	struct ata_queued_cmd *qc;
-	enum scsi_eh_timer_return ret;
+	enum blk_eh_timer_return ret;
 
 	DPRINTK("ENTER\n");
 
 	if (ap->ops->error_handler) {
-		ret = EH_NOT_HANDLED;
+		ret = BLK_EH_NOT_HANDLED;
 		goto out;
 	}
 
-	ret = EH_HANDLED;
+	ret = BLK_EH_HANDLED;
 	spin_lock_irqsave(ap->lock, flags);
 	qc = ata_qc_from_tag(ap, ap->link.active_tag);
 	if (qc) {
 		WARN_ON(qc->scsicmd != cmd);
 		qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
 		qc->err_mask |= AC_ERR_TIMEOUT;
-		ret = EH_NOT_HANDLED;
+		ret = BLK_EH_NOT_HANDLED;
 	}
 	spin_unlock_irqrestore(ap->lock, flags);
 
@@ -831,7 +834,7 @@
 	 * Note that ATA_QCFLAG_FAILED is unconditionally set after
 	 * this function completes.
 	 */
-	scsi_req_abort_cmd(qc->scsicmd);
+	blk_abort_request(qc->scsicmd->request);
 }
 
 /**
@@ -1357,6 +1360,37 @@
 }
 
 /**
+ *	atapi_eh_tur - perform ATAPI TEST_UNIT_READY
+ *	@dev: target ATAPI device
+ *	@r_sense_key: out parameter for sense_key
+ *
+ *	Perform ATAPI TEST_UNIT_READY.
+ *
+ *	LOCKING:
+ *	EH context (may sleep).
+ *
+ *	RETURNS:
+ *	0 on success, AC_ERR_* mask on failure.
+ */
+static unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key)
+{
+	u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 };
+	struct ata_taskfile tf;
+	unsigned int err_mask;
+
+	ata_tf_init(dev, &tf);
+
+	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+	tf.command = ATA_CMD_PACKET;
+	tf.protocol = ATAPI_PROT_NODATA;
+
+	err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0);
+	if (err_mask == AC_ERR_DEV)
+		*r_sense_key = tf.feature >> 4;
+	return err_mask;
+}
+
+/**
  *	atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
  *	@dev: device to perform REQUEST_SENSE to
  *	@sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
@@ -1756,7 +1790,7 @@
 static unsigned int ata_eh_speed_down(struct ata_device *dev,
 				unsigned int eflags, unsigned int err_mask)
 {
-	struct ata_link *link = dev->link;
+	struct ata_link *link = ata_dev_phys_link(dev);
 	int xfer_ok = 0;
 	unsigned int verdict;
 	unsigned int action = 0;
@@ -1880,7 +1914,8 @@
 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
 		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
 
-		if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link)
+		if (!(qc->flags & ATA_QCFLAG_FAILED) ||
+		    ata_dev_phys_link(qc->dev) != link)
 			continue;
 
 		/* inherit upper level err_mask */
@@ -1967,6 +2002,23 @@
 	ata_port_for_each_link(link, ap)
 		ata_eh_link_autopsy(link);
 
+	/* Handle the frigging slave link.  Autopsy is done similarly
+	 * but actions and flags are transferred over to the master
+	 * link and handled from there.
+	 */
+	if (ap->slave_link) {
+		struct ata_eh_context *mehc = &ap->link.eh_context;
+		struct ata_eh_context *sehc = &ap->slave_link->eh_context;
+
+		ata_eh_link_autopsy(ap->slave_link);
+
+		ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
+		mehc->i.action		|= sehc->i.action;
+		mehc->i.dev_action[1]	|= sehc->i.dev_action[1];
+		mehc->i.flags		|= sehc->i.flags;
+		ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
+	}
+
 	/* Autopsy of fanout ports can affect host link autopsy.
 	 * Perform host link autopsy last.
 	 */
@@ -2001,7 +2053,8 @@
 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
 		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
 
-		if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link ||
+		if (!(qc->flags & ATA_QCFLAG_FAILED) ||
+		    ata_dev_phys_link(qc->dev) != link ||
 		    ((qc->flags & ATA_QCFLAG_QUIET) &&
 		     qc->err_mask == AC_ERR_DEV))
 			continue;
@@ -2068,7 +2121,7 @@
 		char cdb_buf[70] = "";
 
 		if (!(qc->flags & ATA_QCFLAG_FAILED) ||
-		    qc->dev->link != link || !qc->err_mask)
+		    ata_dev_phys_link(qc->dev) != link || !qc->err_mask)
 			continue;
 
 		if (qc->dma_dir != DMA_NONE) {
@@ -2160,12 +2213,14 @@
 }
 
 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset,
-			unsigned int *classes, unsigned long deadline)
+			unsigned int *classes, unsigned long deadline,
+			bool clear_classes)
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link)
-		classes[dev->devno] = ATA_DEV_UNKNOWN;
+	if (clear_classes)
+		ata_link_for_each_dev(dev, link)
+			classes[dev->devno] = ATA_DEV_UNKNOWN;
 
 	return reset(link, classes, deadline);
 }
@@ -2187,17 +2242,20 @@
 		 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
 {
 	struct ata_port *ap = link->ap;
+	struct ata_link *slave = ap->slave_link;
 	struct ata_eh_context *ehc = &link->eh_context;
+	struct ata_eh_context *sehc = &slave->eh_context;
 	unsigned int *classes = ehc->classes;
 	unsigned int lflags = link->flags;
 	int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
 	int max_tries = 0, try = 0;
+	struct ata_link *failed_link;
 	struct ata_device *dev;
 	unsigned long deadline, now;
 	ata_reset_fn_t reset;
 	unsigned long flags;
 	u32 sstatus;
-	int nr_known, rc;
+	int nr_unknown, rc;
 
 	/*
 	 * Prepare to reset
@@ -2252,8 +2310,30 @@
 	}
 
 	if (prereset) {
-		rc = prereset(link,
-			      ata_deadline(jiffies, ATA_EH_PRERESET_TIMEOUT));
+		unsigned long deadline = ata_deadline(jiffies,
+						      ATA_EH_PRERESET_TIMEOUT);
+
+		if (slave) {
+			sehc->i.action &= ~ATA_EH_RESET;
+			sehc->i.action |= ehc->i.action;
+		}
+
+		rc = prereset(link, deadline);
+
+		/* If present, do prereset on slave link too.  Reset
+		 * is skipped iff both master and slave links report
+		 * -ENOENT or clear ATA_EH_RESET.
+		 */
+		if (slave && (rc == 0 || rc == -ENOENT)) {
+			int tmp;
+
+			tmp = prereset(slave, deadline);
+			if (tmp != -ENOENT)
+				rc = tmp;
+
+			ehc->i.action |= sehc->i.action;
+		}
+
 		if (rc) {
 			if (rc == -ENOENT) {
 				ata_link_printk(link, KERN_DEBUG,
@@ -2302,25 +2382,51 @@
 		else
 			ehc->i.flags |= ATA_EHI_DID_SOFTRESET;
 
-		rc = ata_do_reset(link, reset, classes, deadline);
-		if (rc && rc != -EAGAIN)
+		rc = ata_do_reset(link, reset, classes, deadline, true);
+		if (rc && rc != -EAGAIN) {
+			failed_link = link;
 			goto fail;
+		}
 
+		/* hardreset slave link if existent */
+		if (slave && reset == hardreset) {
+			int tmp;
+
+			if (verbose)
+				ata_link_printk(slave, KERN_INFO,
+						"hard resetting link\n");
+
+			ata_eh_about_to_do(slave, NULL, ATA_EH_RESET);
+			tmp = ata_do_reset(slave, reset, classes, deadline,
+					   false);
+			switch (tmp) {
+			case -EAGAIN:
+				rc = -EAGAIN;
+			case 0:
+				break;
+			default:
+				failed_link = slave;
+				rc = tmp;
+				goto fail;
+			}
+		}
+
+		/* perform follow-up SRST if necessary */
 		if (reset == hardreset &&
 		    ata_eh_followup_srst_needed(link, rc, classes)) {
-			/* okay, let's do follow-up softreset */
 			reset = softreset;
 
 			if (!reset) {
 				ata_link_printk(link, KERN_ERR,
 						"follow-up softreset required "
 						"but no softreset avaliable\n");
+				failed_link = link;
 				rc = -EINVAL;
 				goto fail;
 			}
 
 			ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
-			rc = ata_do_reset(link, reset, classes, deadline);
+			rc = ata_do_reset(link, reset, classes, deadline, true);
 		}
 	} else {
 		if (verbose)
@@ -2341,7 +2447,7 @@
 		dev->pio_mode = XFER_PIO_0;
 		dev->flags &= ~ATA_DFLAG_SLEEPING;
 
-		if (ata_link_offline(link))
+		if (ata_phys_link_offline(ata_dev_phys_link(dev)))
 			continue;
 
 		/* apply class override */
@@ -2354,6 +2460,8 @@
 	/* record current link speed */
 	if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0)
 		link->sata_spd = (sstatus >> 4) & 0xf;
+	if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0)
+		slave->sata_spd = (sstatus >> 4) & 0xf;
 
 	/* thaw the port */
 	if (ata_is_host_link(link))
@@ -2366,12 +2474,17 @@
 	 * reset and here.  This race is mediated by cross checking
 	 * link onlineness and classification result later.
 	 */
-	if (postreset)
+	if (postreset) {
 		postreset(link, classes);
+		if (slave)
+			postreset(slave, classes);
+	}
 
 	/* clear cached SError */
 	spin_lock_irqsave(link->ap->lock, flags);
 	link->eh_info.serror = 0;
+	if (slave)
+		slave->eh_info.serror = 0;
 	spin_unlock_irqrestore(link->ap->lock, flags);
 
 	/* Make sure onlineness and classification result correspond.
@@ -2381,19 +2494,21 @@
 	 * link onlineness and classification result, those conditions
 	 * can be reliably detected and retried.
 	 */
-	nr_known = 0;
+	nr_unknown = 0;
 	ata_link_for_each_dev(dev, link) {
 		/* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */
-		if (classes[dev->devno] == ATA_DEV_UNKNOWN)
+		if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
 			classes[dev->devno] = ATA_DEV_NONE;
-		else
-			nr_known++;
+			if (ata_phys_link_online(ata_dev_phys_link(dev)))
+				nr_unknown++;
+		}
 	}
 
-	if (classify && !nr_known && ata_link_online(link)) {
+	if (classify && nr_unknown) {
 		if (try < max_tries) {
 			ata_link_printk(link, KERN_WARNING, "link online but "
 				       "device misclassified, retrying\n");
+			failed_link = link;
 			rc = -EAGAIN;
 			goto fail;
 		}
@@ -2404,6 +2519,8 @@
 
 	/* reset successful, schedule revalidation */
 	ata_eh_done(link, NULL, ATA_EH_RESET);
+	if (slave)
+		ata_eh_done(slave, NULL, ATA_EH_RESET);
 	ehc->last_reset = jiffies;
 	ehc->i.action |= ATA_EH_REVALIDATE;
 
@@ -2411,6 +2528,8 @@
  out:
 	/* clear hotplug flag */
 	ehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
+	if (slave)
+		sehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
 
 	spin_lock_irqsave(ap->lock, flags);
 	ap->pflags &= ~ATA_PFLAG_RESETTING;
@@ -2431,7 +2550,7 @@
 	if (time_before(now, deadline)) {
 		unsigned long delta = deadline - now;
 
-		ata_link_printk(link, KERN_WARNING,
+		ata_link_printk(failed_link, KERN_WARNING,
 			"reset failed (errno=%d), retrying in %u secs\n",
 			rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000));
 
@@ -2439,13 +2558,92 @@
 			delta = schedule_timeout_uninterruptible(delta);
 	}
 
-	if (rc == -EPIPE || try == max_tries - 1)
+	if (try == max_tries - 1) {
 		sata_down_spd_limit(link);
+		if (slave)
+			sata_down_spd_limit(slave);
+	} else if (rc == -EPIPE)
+		sata_down_spd_limit(failed_link);
+
 	if (hardreset)
 		reset = hardreset;
 	goto retry;
 }
 
+static inline void ata_eh_pull_park_action(struct ata_port *ap)
+{
+	struct ata_link *link;
+	struct ata_device *dev;
+	unsigned long flags;
+
+	/*
+	 * This function can be thought of as an extended version of
+	 * ata_eh_about_to_do() specially crafted to accommodate the
+	 * requirements of ATA_EH_PARK handling. Since the EH thread
+	 * does not leave the do {} while () loop in ata_eh_recover as
+	 * long as the timeout for a park request to *one* device on
+	 * the port has not expired, and since we still want to pick
+	 * up park requests to other devices on the same port or
+	 * timeout updates for the same device, we have to pull
+	 * ATA_EH_PARK actions from eh_info into eh_context.i
+	 * ourselves at the beginning of each pass over the loop.
+	 *
+	 * Additionally, all write accesses to &ap->park_req_pending
+	 * through INIT_COMPLETION() (see below) or complete_all()
+	 * (see ata_scsi_park_store()) are protected by the host lock.
+	 * As a result we have that park_req_pending.done is zero on
+	 * exit from this function, i.e. when ATA_EH_PARK actions for
+	 * *all* devices on port ap have been pulled into the
+	 * respective eh_context structs. If, and only if,
+	 * park_req_pending.done is non-zero by the time we reach
+	 * wait_for_completion_timeout(), another ATA_EH_PARK action
+	 * has been scheduled for at least one of the devices on port
+	 * ap and we have to cycle over the do {} while () loop in
+	 * ata_eh_recover() again.
+	 */
+
+	spin_lock_irqsave(ap->lock, flags);
+	INIT_COMPLETION(ap->park_req_pending);
+	ata_port_for_each_link(link, ap) {
+		ata_link_for_each_dev(dev, link) {
+			struct ata_eh_info *ehi = &link->eh_info;
+
+			link->eh_context.i.dev_action[dev->devno] |=
+				ehi->dev_action[dev->devno] & ATA_EH_PARK;
+			ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK);
+		}
+	}
+	spin_unlock_irqrestore(ap->lock, flags);
+}
+
+static void ata_eh_park_issue_cmd(struct ata_device *dev, int park)
+{
+	struct ata_eh_context *ehc = &dev->link->eh_context;
+	struct ata_taskfile tf;
+	unsigned int err_mask;
+
+	ata_tf_init(dev, &tf);
+	if (park) {
+		ehc->unloaded_mask |= 1 << dev->devno;
+		tf.command = ATA_CMD_IDLEIMMEDIATE;
+		tf.feature = 0x44;
+		tf.lbal = 0x4c;
+		tf.lbam = 0x4e;
+		tf.lbah = 0x55;
+	} else {
+		ehc->unloaded_mask &= ~(1 << dev->devno);
+		tf.command = ATA_CMD_CHK_POWER;
+	}
+
+	tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
+	tf.protocol |= ATA_PROT_NODATA;
+	err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
+	if (park && (err_mask || tf.lbal != 0xc4)) {
+		ata_dev_printk(dev, KERN_ERR, "head unload failed!\n");
+		ehc->unloaded_mask &= ~(1 << dev->devno);
+	}
+}
+
 static int ata_eh_revalidate_and_attach(struct ata_link *link,
 					struct ata_device **r_failed_dev)
 {
@@ -2472,7 +2670,7 @@
 		if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) {
 			WARN_ON(dev->class == ATA_DEV_PMP);
 
-			if (ata_link_offline(link)) {
+			if (ata_phys_link_offline(ata_dev_phys_link(dev))) {
 				rc = -EIO;
 				goto err;
 			}
@@ -2610,6 +2808,53 @@
 	return rc;
 }
 
+/**
+ *	atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset
+ *	@dev: ATAPI device to clear UA for
+ *
+ *	Resets and other operations can make an ATAPI device raise
+ *	UNIT ATTENTION which causes the next operation to fail.  This
+ *	function clears UA.
+ *
+ *	LOCKING:
+ *	EH context (may sleep).
+ *
+ *	RETURNS:
+ *	0 on success, -errno on failure.
+ */
+static int atapi_eh_clear_ua(struct ata_device *dev)
+{
+	int i;
+
+	for (i = 0; i < ATA_EH_UA_TRIES; i++) {
+		u8 sense_buffer[SCSI_SENSE_BUFFERSIZE];
+		u8 sense_key = 0;
+		unsigned int err_mask;
+
+		err_mask = atapi_eh_tur(dev, &sense_key);
+		if (err_mask != 0 && err_mask != AC_ERR_DEV) {
+			ata_dev_printk(dev, KERN_WARNING, "TEST_UNIT_READY "
+				"failed (err_mask=0x%x)\n", err_mask);
+			return -EIO;
+		}
+
+		if (!err_mask || sense_key != UNIT_ATTENTION)
+			return 0;
+
+		err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key);
+		if (err_mask) {
+			ata_dev_printk(dev, KERN_WARNING, "failed to clear "
+				"UNIT ATTENTION (err_mask=0x%x)\n", err_mask);
+			return -EIO;
+		}
+	}
+
+	ata_dev_printk(dev, KERN_WARNING,
+		"UNIT ATTENTION persists after %d tries\n", ATA_EH_UA_TRIES);
+
+	return 0;
+}
+
 static int ata_link_nr_enabled(struct ata_link *link)
 {
 	struct ata_device *dev;
@@ -2697,7 +2942,7 @@
 			/* This is the last chance, better to slow
 			 * down than lose it.
 			 */
-			sata_down_spd_limit(dev->link);
+			sata_down_spd_limit(ata_dev_phys_link(dev));
 			ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
 		}
 	}
@@ -2707,7 +2952,7 @@
 		ata_dev_disable(dev);
 
 		/* detach if offline */
-		if (ata_link_offline(dev->link))
+		if (ata_phys_link_offline(ata_dev_phys_link(dev)))
 			ata_eh_detach_dev(dev);
 
 		/* schedule probe if necessary */
@@ -2755,7 +3000,7 @@
 	struct ata_device *dev;
 	int nr_failed_devs;
 	int rc;
-	unsigned long flags;
+	unsigned long flags, deadline;
 
 	DPRINTK("ENTER\n");
 
@@ -2829,6 +3074,56 @@
 		}
 	}
 
+	do {
+		unsigned long now;
+
+		/*
+		 * clears ATA_EH_PARK in eh_info and resets
+		 * ap->park_req_pending
+		 */
+		ata_eh_pull_park_action(ap);
+
+		deadline = jiffies;
+		ata_port_for_each_link(link, ap) {
+			ata_link_for_each_dev(dev, link) {
+				struct ata_eh_context *ehc = &link->eh_context;
+				unsigned long tmp;
+
+				if (dev->class != ATA_DEV_ATA)
+					continue;
+				if (!(ehc->i.dev_action[dev->devno] &
+				      ATA_EH_PARK))
+					continue;
+				tmp = dev->unpark_deadline;
+				if (time_before(deadline, tmp))
+					deadline = tmp;
+				else if (time_before_eq(tmp, jiffies))
+					continue;
+				if (ehc->unloaded_mask & (1 << dev->devno))
+					continue;
+
+				ata_eh_park_issue_cmd(dev, 1);
+			}
+		}
+
+		now = jiffies;
+		if (time_before_eq(deadline, now))
+			break;
+
+		deadline = wait_for_completion_timeout(&ap->park_req_pending,
+						       deadline - now);
+	} while (deadline);
+	ata_port_for_each_link(link, ap) {
+		ata_link_for_each_dev(dev, link) {
+			if (!(link->eh_context.unloaded_mask &
+			      (1 << dev->devno)))
+				continue;
+
+			ata_eh_park_issue_cmd(dev, 0);
+			ata_eh_done(link, dev, ATA_EH_PARK);
+		}
+	}
+
 	/* the rest */
 	ata_port_for_each_link(link, ap) {
 		struct ata_eh_context *ehc = &link->eh_context;
@@ -2852,6 +3147,20 @@
 			ehc->i.flags &= ~ATA_EHI_SETMODE;
 		}
 
+		/* If reset has been issued, clear UA to avoid
+		 * disrupting the current users of the device.
+		 */
+		if (ehc->i.flags & ATA_EHI_DID_RESET) {
+			ata_link_for_each_dev(dev, link) {
+				if (dev->class != ATA_DEV_ATAPI)
+					continue;
+				rc = atapi_eh_clear_ua(dev);
+				if (rc)
+					goto dev_fail;
+			}
+		}
+
+		/* configure link power saving */
 		if (ehc->i.action & ATA_EH_LPM)
 			ata_link_for_each_dev(dev, link)
 				ata_dev_enable_pm(dev, ap->pm_policy);

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index b9d3ba4..59fe051 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c

@@ -183,6 +183,105 @@
 		ata_scsi_lpm_show, ata_scsi_lpm_put);
 EXPORT_SYMBOL_GPL(dev_attr_link_power_management_policy);
 
+static ssize_t ata_scsi_park_show(struct device *device,
+				  struct device_attribute *attr, char *buf)
+{
+	struct scsi_device *sdev = to_scsi_device(device);
+	struct ata_port *ap;
+	struct ata_link *link;
+	struct ata_device *dev;
+	unsigned long flags;
+	unsigned int uninitialized_var(msecs);
+	int rc = 0;
+
+	ap = ata_shost_to_port(sdev->host);
+
+	spin_lock_irqsave(ap->lock, flags);
+	dev = ata_scsi_find_dev(ap, sdev);
+	if (!dev) {
+		rc = -ENODEV;
+		goto unlock;
+	}
+	if (dev->flags & ATA_DFLAG_NO_UNLOAD) {
+		rc = -EOPNOTSUPP;
+		goto unlock;
+	}
+
+	link = dev->link;
+	if (ap->pflags & ATA_PFLAG_EH_IN_PROGRESS &&
+	    link->eh_context.unloaded_mask & (1 << dev->devno) &&
+	    time_after(dev->unpark_deadline, jiffies))
+		msecs = jiffies_to_msecs(dev->unpark_deadline - jiffies);
+	else
+		msecs = 0;
+
+unlock:
+	spin_unlock_irq(ap->lock);
+
+	return rc ? rc : snprintf(buf, 20, "%u\n", msecs);
+}
+
+static ssize_t ata_scsi_park_store(struct device *device,
+				   struct device_attribute *attr,
+				   const char *buf, size_t len)
+{
+	struct scsi_device *sdev = to_scsi_device(device);
+	struct ata_port *ap;
+	struct ata_device *dev;
+	long int input;
+	unsigned long flags;
+	int rc;
+
+	rc = strict_strtol(buf, 10, &input);
+	if (rc || input < -2)
+		return -EINVAL;
+	if (input > ATA_TMOUT_MAX_PARK) {
+		rc = -EOVERFLOW;
+		input = ATA_TMOUT_MAX_PARK;
+	}
+
+	ap = ata_shost_to_port(sdev->host);
+
+	spin_lock_irqsave(ap->lock, flags);
+	dev = ata_scsi_find_dev(ap, sdev);
+	if (unlikely(!dev)) {
+		rc = -ENODEV;
+		goto unlock;
+	}
+	if (dev->class != ATA_DEV_ATA) {
+		rc = -EOPNOTSUPP;
+		goto unlock;
+	}
+
+	if (input >= 0) {
+		if (dev->flags & ATA_DFLAG_NO_UNLOAD) {
+			rc = -EOPNOTSUPP;
+			goto unlock;
+		}
+
+		dev->unpark_deadline = ata_deadline(jiffies, input);
+		dev->link->eh_info.dev_action[dev->devno] |= ATA_EH_PARK;
+		ata_port_schedule_eh(ap);
+		complete(&ap->park_req_pending);
+	} else {
+		switch (input) {
+		case -1:
+			dev->flags &= ~ATA_DFLAG_NO_UNLOAD;
+			break;
+		case -2:
+			dev->flags |= ATA_DFLAG_NO_UNLOAD;
+			break;
+		}
+	}
+unlock:
+	spin_unlock_irqrestore(ap->lock, flags);
+
+	return rc ? rc : len;
+}
+DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR,
+	    ata_scsi_park_show, ata_scsi_park_store);
+EXPORT_SYMBOL_GPL(dev_attr_unload_heads);
+
 static void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq)
 {
 	cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
@@ -269,6 +368,12 @@
 			ata_scsi_activity_store);
 EXPORT_SYMBOL_GPL(dev_attr_sw_activity);
 
+struct device_attribute *ata_common_sdev_attrs[] = {
+	&dev_attr_unload_heads,
+	NULL
+};
+EXPORT_SYMBOL_GPL(ata_common_sdev_attrs);
+
 static void ata_scsi_invalid_field(struct scsi_cmnd *cmd,
 				   void (*done)(struct scsi_cmnd *))
 {
@@ -954,6 +1059,9 @@
 static int ata_scsi_dev_config(struct scsi_device *sdev,
 			       struct ata_device *dev)
 {
+	if (!ata_id_has_unload(dev->id))
+		dev->flags |= ATA_DFLAG_NO_UNLOAD;
+
 	/* configure max sectors */
 	blk_queue_max_sectors(sdev->request_queue, dev->max_sectors);
 
@@ -977,6 +1085,10 @@
 
 		blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN);
 	} else {
+		if (ata_id_is_ssd(dev->id))
+			queue_flag_set_unlocked(QUEUE_FLAG_NONROT,
+						sdev->request_queue);
+
 		/* ATA devices must be sector aligned */
 		blk_queue_update_dma_alignment(sdev->request_queue,
 					       ATA_SECT_SIZE - 1);

diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index ade5c75..fe2839e 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h

@@ -70,6 +70,7 @@
 extern int libata_fua;
 extern int libata_noacpi;
 extern int libata_allow_tpm;
+extern struct ata_link *ata_dev_phys_link(struct ata_device *dev);
 extern void ata_force_cbl(struct ata_port *ap);
 extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
 extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
@@ -107,6 +108,8 @@
 extern void __ata_qc_complete(struct ata_queued_cmd *qc);
 extern int atapi_check_dma(struct ata_queued_cmd *qc);
 extern void swap_buf_le16(u16 *buf, unsigned int buf_words);
+extern bool ata_phys_link_online(struct ata_link *link);
+extern bool ata_phys_link_offline(struct ata_link *link);
 extern void ata_dev_init(struct ata_device *dev);
 extern void ata_link_init(struct ata_port *ap, struct ata_link *link, int pmp);
 extern int sata_link_init_spd(struct ata_link *link);
@@ -152,7 +155,7 @@
 /* libata-eh.c */
 extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
 extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd);
-extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
+extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
 extern void ata_scsi_error(struct Scsi_Host *host);
 extern void ata_port_wait_eh(struct ata_port *ap);
 extern void ata_eh_fastdrain_timerfn(unsigned long arg);

diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c
index d393290..1266924 100644
--- a/drivers/ata/pata_bf54x.c
+++ b/drivers/ata/pata_bf54x.c

@@ -1632,6 +1632,8 @@
 		return -ENODEV;
 	}
 
+	dev_set_drvdata(&pdev->dev, host);
+
 	return 0;
 }
 
@@ -1648,6 +1650,7 @@
 	struct ata_host *host = dev_get_drvdata(dev);
 
 	ata_host_detach(host);
+	dev_set_drvdata(&pdev->dev, NULL);
 
 	peripheral_free_list(atapi_io_port);
 
@@ -1655,27 +1658,44 @@
 }
 
 #ifdef CONFIG_PM
-int bfin_atapi_suspend(struct platform_device *pdev, pm_message_t state)
+static int bfin_atapi_suspend(struct platform_device *pdev, pm_message_t state)
 {
-	return 0;
+	struct ata_host *host = dev_get_drvdata(&pdev->dev);
+	if (host)
+		return ata_host_suspend(host, state);
+	else
+		return 0;
 }
 
-int bfin_atapi_resume(struct platform_device *pdev)
+static int bfin_atapi_resume(struct platform_device *pdev)
 {
+	struct ata_host *host = dev_get_drvdata(&pdev->dev);
+	int ret;
+
+	if (host) {
+		ret = bfin_reset_controller(host);
+		if (ret) {
+			printk(KERN_ERR DRV_NAME ": Error during HW init\n");
+			return ret;
+		}
+		ata_host_resume(host);
+	}
+
 	return 0;
 }
+#else
+#define bfin_atapi_suspend NULL
+#define bfin_atapi_resume NULL
 #endif
 
 static struct platform_driver bfin_atapi_driver = {
 	.probe			= bfin_atapi_probe,
 	.remove			= __devexit_p(bfin_atapi_remove),
+	.suspend		= bfin_atapi_suspend,
+	.resume			= bfin_atapi_resume,
 	.driver = {
 		.name		= DRV_NAME,
 		.owner		= THIS_MODULE,
-#ifdef CONFIG_PM
-		.suspend	= bfin_atapi_suspend,
-		.resume		= bfin_atapi_resume,
-#endif
 	},
 };
 

diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c
index e970b22..a598bb3 100644
--- a/drivers/ata/pata_sil680.c
+++ b/drivers/ata/pata_sil680.c

@@ -230,7 +230,7 @@
 		tmpbyte & 1, tmpbyte & 0x30);
 
 	*try_mmio = 0;
-#ifdef CONFIG_PPC_MERGE
+#ifdef CONFIG_PPC
 	if (machine_is(cell))
 		*try_mmio = (tmpbyte & 1) || pci_resource_start(pdev, 5);
 #endif

diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 3924e72..1a56db9 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c

@@ -469,10 +469,10 @@
 	return true;
 }
 
-static int sata_fsl_scr_write(struct ata_port *ap, unsigned int sc_reg_in,
-			       u32 val)
+static int sata_fsl_scr_write(struct ata_link *link,
+			      unsigned int sc_reg_in, u32 val)
 {
-	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
+	struct sata_fsl_host_priv *host_priv = link->ap->host->private_data;
 	void __iomem *ssr_base = host_priv->ssr_base;
 	unsigned int sc_reg;
 
@@ -493,10 +493,10 @@
 	return 0;
 }
 
-static int sata_fsl_scr_read(struct ata_port *ap, unsigned int sc_reg_in,
-			u32 *val)
+static int sata_fsl_scr_read(struct ata_link *link,
+			     unsigned int sc_reg_in, u32 *val)
 {
-	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
+	struct sata_fsl_host_priv *host_priv = link->ap->host->private_data;
 	void __iomem *ssr_base = host_priv->ssr_base;
 	unsigned int sc_reg;
 
@@ -645,12 +645,12 @@
 	 * Workaround for 8315DS board 3gbps link-up issue,
 	 * currently limit SATA port to GEN1 speed
 	 */
-	sata_fsl_scr_read(ap, SCR_CONTROL, &temp);
+	sata_fsl_scr_read(&ap->link, SCR_CONTROL, &temp);
 	temp &= ~(0xF << 4);
 	temp |= (0x1 << 4);
-	sata_fsl_scr_write(ap, SCR_CONTROL, temp);
+	sata_fsl_scr_write(&ap->link, SCR_CONTROL, temp);
 
-	sata_fsl_scr_read(ap, SCR_CONTROL, &temp);
+	sata_fsl_scr_read(&ap->link, SCR_CONTROL, &temp);
 	dev_printk(KERN_WARNING, dev, "scr_control, speed limited to %x\n",
 			temp);
 #endif
@@ -868,7 +868,7 @@
 			ioread32(CQ + hcr_base),
 			ioread32(CA + hcr_base), ioread32(CC + hcr_base));
 
-		sata_fsl_scr_read(ap, SCR_ERROR, &Serror);
+		sata_fsl_scr_read(&ap->link, SCR_ERROR, &Serror);
 
 		DPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS));
 		DPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL));
@@ -972,9 +972,9 @@
 	 * Handle & Clear SError
 	 */
 
-	sata_fsl_scr_read(ap, SCR_ERROR, &SError);
+	sata_fsl_scr_read(&ap->link, SCR_ERROR, &SError);
 	if (unlikely(SError & 0xFFFF0000)) {
-		sata_fsl_scr_write(ap, SCR_ERROR, SError);
+		sata_fsl_scr_write(&ap->link, SCR_ERROR, SError);
 	}
 
 	DPRINTK("error_intr,hStat=0x%x,CE=0x%x,DE =0x%x,SErr=0x%x\n",
@@ -1091,7 +1091,7 @@
 
 	hstatus = ioread32(hcr_base + HSTATUS);
 
-	sata_fsl_scr_read(ap, SCR_ERROR, &SError);
+	sata_fsl_scr_read(&ap->link, SCR_ERROR, &SError);
 
 	if (unlikely(SError & 0xFFFF0000)) {
 		DPRINTK("serror @host_intr : 0x%x\n", SError);

diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index 5032c32..fbbd87c 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c

@@ -269,9 +269,9 @@
 	writeb(0xff, port_base + PORT_IRQ_STAT);
 }
 
-static int inic_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val)
+static int inic_scr_read(struct ata_link *link, unsigned sc_reg, u32 *val)
 {
-	void __iomem *scr_addr = inic_port_base(ap) + PORT_SCR;
+	void __iomem *scr_addr = inic_port_base(link->ap) + PORT_SCR;
 	void __iomem *addr;
 
 	if (unlikely(sc_reg >= ARRAY_SIZE(scr_map)))
@@ -286,9 +286,9 @@
 	return 0;
 }
 
-static int inic_scr_write(struct ata_port *ap, unsigned sc_reg, u32 val)
+static int inic_scr_write(struct ata_link *link, unsigned sc_reg, u32 val)
 {
-	void __iomem *scr_addr = inic_port_base(ap) + PORT_SCR;
+	void __iomem *scr_addr = inic_port_base(link->ap) + PORT_SCR;
 
 	if (unlikely(sc_reg >= ARRAY_SIZE(scr_map)))
 		return -EINVAL;

diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index c815f8e..2b24ae5 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c

@@ -493,10 +493,10 @@
 	void (*reset_bus)(struct ata_host *host, void __iomem *mmio);
 };
 
-static int mv_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val);
-static int mv_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val);
-static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val);
-static int mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val);
+static int mv_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val);
+static int mv_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val);
+static int mv5_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val);
+static int mv5_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val);
 static int mv_port_start(struct ata_port *ap);
 static void mv_port_stop(struct ata_port *ap);
 static int mv_qc_defer(struct ata_queued_cmd *qc);
@@ -1070,23 +1070,23 @@
 	return ofs;
 }
 
-static int mv_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val)
+static int mv_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val)
 {
 	unsigned int ofs = mv_scr_offset(sc_reg_in);
 
 	if (ofs != 0xffffffffU) {
-		*val = readl(mv_ap_base(ap) + ofs);
+		*val = readl(mv_ap_base(link->ap) + ofs);
 		return 0;
 	} else
 		return -EINVAL;
 }
 
-static int mv_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val)
+static int mv_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val)
 {
 	unsigned int ofs = mv_scr_offset(sc_reg_in);
 
 	if (ofs != 0xffffffffU) {
-		writelfl(val, mv_ap_base(ap) + ofs);
+		writelfl(val, mv_ap_base(link->ap) + ofs);
 		return 0;
 	} else
 		return -EINVAL;
@@ -2251,11 +2251,11 @@
 	return ofs;
 }
 
-static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val)
+static int mv5_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val)
 {
-	struct mv_host_priv *hpriv = ap->host->private_data;
+	struct mv_host_priv *hpriv = link->ap->host->private_data;
 	void __iomem *mmio = hpriv->base;
-	void __iomem *addr = mv5_phy_base(mmio, ap->port_no);
+	void __iomem *addr = mv5_phy_base(mmio, link->ap->port_no);
 	unsigned int ofs = mv5_scr_offset(sc_reg_in);
 
 	if (ofs != 0xffffffffU) {
@@ -2265,11 +2265,11 @@
 		return -EINVAL;
 }
 
-static int mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val)
+static int mv5_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val)
 {
-	struct mv_host_priv *hpriv = ap->host->private_data;
+	struct mv_host_priv *hpriv = link->ap->host->private_data;
 	void __iomem *mmio = hpriv->base;
-	void __iomem *addr = mv5_phy_base(mmio, ap->port_no);
+	void __iomem *addr = mv5_phy_base(mmio, link->ap->port_no);
 	unsigned int ofs = mv5_scr_offset(sc_reg_in);
 
 	if (ofs != 0xffffffffU) {

diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 14601dc..fae3841 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c

@@ -302,8 +302,8 @@
 static irqreturn_t nv_generic_interrupt(int irq, void *dev_instance);
 static irqreturn_t nv_nf2_interrupt(int irq, void *dev_instance);
 static irqreturn_t nv_ck804_interrupt(int irq, void *dev_instance);
-static int nv_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int nv_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int nv_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 
 static void nv_nf2_freeze(struct ata_port *ap);
 static void nv_nf2_thaw(struct ata_port *ap);
@@ -1511,21 +1511,21 @@
 	return ret;
 }
 
-static int nv_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int nv_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
 
-	*val = ioread32(ap->ioaddr.scr_addr + (sc_reg * 4));
+	*val = ioread32(link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
-static int nv_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
 
-	iowrite32(val, ap->ioaddr.scr_addr + (sc_reg * 4));
+	iowrite32(val, link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
@@ -2218,9 +2218,9 @@
 	if (!pp->qc_active)
 		return;
 
-	if (ap->ops->scr_read(ap, SCR_ERROR, &serror))
+	if (ap->ops->scr_read(&ap->link, SCR_ERROR, &serror))
 		return;
-	ap->ops->scr_write(ap, SCR_ERROR, serror);
+	ap->ops->scr_write(&ap->link, SCR_ERROR, serror);
 
 	if (ata_stat & ATA_ERR) {
 		ata_ehi_clear_desc(ehi);

diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 030665b..750d8cd 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c

@@ -137,8 +137,8 @@
 	dma_addr_t		pkt_dma;
 };
 
-static int pdc_sata_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int pdc_sata_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int pdc_sata_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int pdc_sata_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 static int pdc_ata_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
 static int pdc_common_port_start(struct ata_port *ap);
 static int pdc_sata_port_start(struct ata_port *ap);
@@ -386,19 +386,21 @@
 	return ATA_CBL_SATA;
 }
 
-static int pdc_sata_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int pdc_sata_scr_read(struct ata_link *link,
+			     unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	*val = readl(ap->ioaddr.scr_addr + (sc_reg * 4));
+	*val = readl(link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
-static int pdc_sata_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int pdc_sata_scr_write(struct ata_link *link,
+			      unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	writel(val, ap->ioaddr.scr_addr + (sc_reg * 4));
+	writel(val, link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
@@ -731,7 +733,7 @@
 	if (sata_scr_valid(&ap->link)) {
 		u32 serror;
 
-		pdc_sata_scr_read(ap, SCR_ERROR, &serror);
+		pdc_sata_scr_read(&ap->link, SCR_ERROR, &serror);
 		ehi->serror |= serror;
 	}
 

diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index 1600107..a000c86 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c

@@ -111,8 +111,8 @@
 	qs_state_t		state;
 };
 
-static int qs_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int qs_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int qs_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int qs_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 static int qs_ata_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
 static int qs_port_start(struct ata_port *ap);
 static void qs_host_stop(struct ata_host *host);
@@ -242,11 +242,11 @@
 	return ata_sff_prereset(link, deadline);
 }
 
-static int qs_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int qs_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	*val = readl(ap->ioaddr.scr_addr + (sc_reg * 8));
+	*val = readl(link->ap->ioaddr.scr_addr + (sc_reg * 8));
 	return 0;
 }
 
@@ -256,11 +256,11 @@
 	ata_std_error_handler(ap);
 }
 
-static int qs_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int qs_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	writel(val, ap->ioaddr.scr_addr + (sc_reg * 8));
+	writel(val, link->ap->ioaddr.scr_addr + (sc_reg * 8));
 	return 0;
 }
 

diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 88bf421..031d7b7 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c

@@ -115,8 +115,8 @@
 static int sil_pci_device_resume(struct pci_dev *pdev);
 #endif
 static void sil_dev_config(struct ata_device *dev);
-static int sil_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int sil_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int sil_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int sil_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 static int sil_set_mode(struct ata_link *link, struct ata_device **r_failed);
 static void sil_freeze(struct ata_port *ap);
 static void sil_thaw(struct ata_port *ap);
@@ -317,9 +317,9 @@
 	return NULL;
 }
 
-static int sil_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int sil_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
-	void __iomem *mmio = sil_scr_addr(ap, sc_reg);
+	void __iomem *mmio = sil_scr_addr(link->ap, sc_reg);
 
 	if (mmio) {
 		*val = readl(mmio);
@@ -328,9 +328,9 @@
 	return -EINVAL;
 }
 
-static int sil_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int sil_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
-	void __iomem *mmio = sil_scr_addr(ap, sc_reg);
+	void __iomem *mmio = sil_scr_addr(link->ap, sc_reg);
 
 	if (mmio) {
 		writel(val, mmio);
@@ -352,8 +352,8 @@
 		 * controllers continue to assert IRQ as long as
 		 * SError bits are pending.  Clear SError immediately.
 		 */
-		sil_scr_read(ap, SCR_ERROR, &serror);
-		sil_scr_write(ap, SCR_ERROR, serror);
+		sil_scr_read(&ap->link, SCR_ERROR, &serror);
+		sil_scr_write(&ap->link, SCR_ERROR, serror);
 
 		/* Sometimes spurious interrupts occur, double check
 		 * it's PHYRDY CHG.

diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index 84ffcc2..4621807 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c

@@ -340,8 +340,8 @@
 };
 
 static void sil24_dev_config(struct ata_device *dev);
-static int sil24_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val);
-static int sil24_scr_write(struct ata_port *ap, unsigned sc_reg, u32 val);
+static int sil24_scr_read(struct ata_link *link, unsigned sc_reg, u32 *val);
+static int sil24_scr_write(struct ata_link *link, unsigned sc_reg, u32 val);
 static int sil24_qc_defer(struct ata_queued_cmd *qc);
 static void sil24_qc_prep(struct ata_queued_cmd *qc);
 static unsigned int sil24_qc_issue(struct ata_queued_cmd *qc);
@@ -504,9 +504,9 @@
 	[SCR_ACTIVE]	= 3,
 };
 
-static int sil24_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val)
+static int sil24_scr_read(struct ata_link *link, unsigned sc_reg, u32 *val)
 {
-	void __iomem *scr_addr = sil24_port_base(ap) + PORT_SCONTROL;
+	void __iomem *scr_addr = sil24_port_base(link->ap) + PORT_SCONTROL;
 
 	if (sc_reg < ARRAY_SIZE(sil24_scr_map)) {
 		void __iomem *addr;
@@ -517,9 +517,9 @@
 	return -EINVAL;
 }
 
-static int sil24_scr_write(struct ata_port *ap, unsigned sc_reg, u32 val)
+static int sil24_scr_write(struct ata_link *link, unsigned sc_reg, u32 val)
 {
-	void __iomem *scr_addr = sil24_port_base(ap) + PORT_SCONTROL;
+	void __iomem *scr_addr = sil24_port_base(link->ap) + PORT_SCONTROL;
 
 	if (sc_reg < ARRAY_SIZE(sil24_scr_map)) {
 		void __iomem *addr;

diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c
index 1010b30..9c43b4e 100644
--- a/drivers/ata/sata_sis.c
+++ b/drivers/ata/sata_sis.c

@@ -64,8 +64,8 @@
 };
 
 static int sis_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
-static int sis_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int sis_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int sis_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int sis_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 
 static const struct pci_device_id sis_pci_tbl[] = {
 	{ PCI_VDEVICE(SI, 0x0180), sis_180 },	/* SiS 964/180 */
@@ -134,10 +134,11 @@
 	return addr;
 }
 
-static u32 sis_scr_cfg_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static u32 sis_scr_cfg_read(struct ata_link *link,
+			    unsigned int sc_reg, u32 *val)
 {
-	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-	unsigned int cfg_addr = get_scr_cfg_addr(ap, sc_reg);
+	struct pci_dev *pdev = to_pci_dev(link->ap->host->dev);
+	unsigned int cfg_addr = get_scr_cfg_addr(link->ap, sc_reg);
 	u32 val2 = 0;
 	u8 pmr;
 
@@ -158,10 +159,11 @@
 	return 0;
 }
 
-static int sis_scr_cfg_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int sis_scr_cfg_write(struct ata_link *link,
+			     unsigned int sc_reg, u32 val)
 {
-	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-	unsigned int cfg_addr = get_scr_cfg_addr(ap, sc_reg);
+	struct pci_dev *pdev = to_pci_dev(link->ap->host->dev);
+	unsigned int cfg_addr = get_scr_cfg_addr(link->ap, sc_reg);
 	u8 pmr;
 
 	if (sc_reg == SCR_ERROR) /* doesn't exist in PCI cfg space */
@@ -178,8 +180,9 @@
 	return 0;
 }
 
-static int sis_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int sis_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
+	struct ata_port *ap = link->ap;
 	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
 	u8 pmr;
 
@@ -187,7 +190,7 @@
 		return -EINVAL;
 
 	if (ap->flags & SIS_FLAG_CFGSCR)
-		return sis_scr_cfg_read(ap, sc_reg, val);
+		return sis_scr_cfg_read(link, sc_reg, val);
 
 	pci_read_config_byte(pdev, SIS_PMR, &pmr);
 
@@ -202,8 +205,9 @@
 	return 0;
 }
 
-static int sis_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int sis_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
+	struct ata_port *ap = link->ap;
 	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
 	u8 pmr;
 
@@ -213,7 +217,7 @@
 	pci_read_config_byte(pdev, SIS_PMR, &pmr);
 
 	if (ap->flags & SIS_FLAG_CFGSCR)
-		return sis_scr_cfg_write(ap, sc_reg, val);
+		return sis_scr_cfg_write(link, sc_reg, val);
 	else {
 		iowrite32(val, ap->ioaddr.scr_addr + (sc_reg * 4));
 		if ((pdev->device == 0x0182) || (pdev->device == 0x0183) ||

diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c
index fb13b82..609d147 100644
--- a/drivers/ata/sata_svw.c
+++ b/drivers/ata/sata_svw.c

@@ -123,20 +123,22 @@
 	}
 }
 
-static int k2_sata_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int k2_sata_scr_read(struct ata_link *link,
+			    unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	*val = readl(ap->ioaddr.scr_addr + (sc_reg * 4));
+	*val = readl(link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
 
-static int k2_sata_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int k2_sata_scr_write(struct ata_link *link,
+			     unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	writel(val, ap->ioaddr.scr_addr + (sc_reg * 4));
+	writel(val, link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 

diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c
index db529b8..019575b 100644
--- a/drivers/ata/sata_uli.c
+++ b/drivers/ata/sata_uli.c

@@ -57,8 +57,8 @@
 };
 
 static int uli_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
-static int uli_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int uli_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int uli_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int uli_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 
 static const struct pci_device_id uli_pci_tbl[] = {
 	{ PCI_VDEVICE(AL, 0x5289), uli_5289 },
@@ -107,39 +107,39 @@
 	return hpriv->scr_cfg_addr[ap->port_no] + (4 * sc_reg);
 }
 
-static u32 uli_scr_cfg_read(struct ata_port *ap, unsigned int sc_reg)
+static u32 uli_scr_cfg_read(struct ata_link *link, unsigned int sc_reg)
 {
-	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-	unsigned int cfg_addr = get_scr_cfg_addr(ap, sc_reg);
+	struct pci_dev *pdev = to_pci_dev(link->ap->host->dev);
+	unsigned int cfg_addr = get_scr_cfg_addr(link->ap, sc_reg);
 	u32 val;
 
 	pci_read_config_dword(pdev, cfg_addr, &val);
 	return val;
 }
 
-static void uli_scr_cfg_write(struct ata_port *ap, unsigned int scr, u32 val)
+static void uli_scr_cfg_write(struct ata_link *link, unsigned int scr, u32 val)
 {
-	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
-	unsigned int cfg_addr = get_scr_cfg_addr(ap, scr);
+	struct pci_dev *pdev = to_pci_dev(link->ap->host->dev);
+	unsigned int cfg_addr = get_scr_cfg_addr(link->ap, scr);
 
 	pci_write_config_dword(pdev, cfg_addr, val);
 }
 
-static int uli_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int uli_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
 
-	*val = uli_scr_cfg_read(ap, sc_reg);
+	*val = uli_scr_cfg_read(link, sc_reg);
 	return 0;
 }
 
-static int uli_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int uli_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL) //SCR_CONTROL=2, SCR_ERROR=1, SCR_STATUS=0
 		return -EINVAL;
 
-	uli_scr_cfg_write(ap, sc_reg, val);
+	uli_scr_cfg_write(link, sc_reg, val);
 	return 0;
 }
 

diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index 96deeb3..1cfa745 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c

@@ -68,8 +68,8 @@
 };
 
 static int svia_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
-static int svia_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-static int svia_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val);
+static int svia_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
+static int svia_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
 static void svia_noop_freeze(struct ata_port *ap);
 static int vt6420_prereset(struct ata_link *link, unsigned long deadline);
 static int vt6421_pata_cable_detect(struct ata_port *ap);
@@ -152,19 +152,19 @@
 MODULE_DEVICE_TABLE(pci, svia_pci_tbl);
 MODULE_VERSION(DRV_VERSION);
 
-static int svia_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int svia_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	*val = ioread32(ap->ioaddr.scr_addr + (4 * sc_reg));
+	*val = ioread32(link->ap->ioaddr.scr_addr + (4 * sc_reg));
 	return 0;
 }
 
-static int svia_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int svia_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	iowrite32(val, ap->ioaddr.scr_addr + (4 * sc_reg));
+	iowrite32(val, link->ap->ioaddr.scr_addr + (4 * sc_reg));
 	return 0;
 }
 
@@ -210,20 +210,20 @@
 		goto skip_scr;
 
 	/* Resume phy.  This is the old SATA resume sequence */
-	svia_scr_write(ap, SCR_CONTROL, 0x300);
-	svia_scr_read(ap, SCR_CONTROL, &scontrol); /* flush */
+	svia_scr_write(link, SCR_CONTROL, 0x300);
+	svia_scr_read(link, SCR_CONTROL, &scontrol); /* flush */
 
 	/* wait for phy to become ready, if necessary */
 	do {
 		msleep(200);
-		svia_scr_read(ap, SCR_STATUS, &sstatus);
+		svia_scr_read(link, SCR_STATUS, &sstatus);
 		if ((sstatus & 0xf) != 1)
 			break;
 	} while (time_before(jiffies, timeout));
 
 	/* open code sata_print_link_status() */
-	svia_scr_read(ap, SCR_STATUS, &sstatus);
-	svia_scr_read(ap, SCR_CONTROL, &scontrol);
+	svia_scr_read(link, SCR_STATUS, &sstatus);
+	svia_scr_read(link, SCR_CONTROL, &scontrol);
 
 	online = (sstatus & 0xf) == 0x3;
 
@@ -232,7 +232,7 @@
 			online ? "up" : "down", sstatus, scontrol);
 
 	/* SStatus is read one more time */
-	svia_scr_read(ap, SCR_STATUS, &sstatus);
+	svia_scr_read(link, SCR_STATUS, &sstatus);
 
 	if (!online) {
 		/* tell EH to bail */

diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c
index f3d635c..c57cdff 100644
--- a/drivers/ata/sata_vsc.c
+++ b/drivers/ata/sata_vsc.c

@@ -98,20 +98,22 @@
 			      VSC_SATA_INT_PHY_CHANGE),
 };
 
-static int vsc_sata_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val)
+static int vsc_sata_scr_read(struct ata_link *link,
+			     unsigned int sc_reg, u32 *val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	*val = readl(ap->ioaddr.scr_addr + (sc_reg * 4));
+	*val = readl(link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 
 
-static int vsc_sata_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val)
+static int vsc_sata_scr_write(struct ata_link *link,
+			      unsigned int sc_reg, u32 val)
 {
 	if (sc_reg > SCR_CONTROL)
 		return -EINVAL;
-	writel(val, ap->ioaddr.scr_addr + (sc_reg * 4));
+	writel(val, link->ap->ioaddr.scr_addr + (sc_reg * 4));
 	return 0;
 }
 

diff --git a/drivers/base/base.h b/drivers/base/base.h
index 31dc0cd..0a5f055 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h

@@ -54,7 +54,7 @@
  */
 struct class_private {
 	struct kset class_subsys;
-	struct list_head class_devices;
+	struct klist class_devices;
 	struct list_head class_interfaces;
 	struct kset class_dirs;
 	struct mutex class_mutex;

diff --git a/drivers/base/class.c b/drivers/base/class.c
index cc5e28c..eb85e43 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c

@@ -135,6 +135,20 @@
 	}
 }
 
+static void klist_class_dev_get(struct klist_node *n)
+{
+	struct device *dev = container_of(n, struct device, knode_class);
+
+	get_device(dev);
+}
+
+static void klist_class_dev_put(struct klist_node *n)
+{
+	struct device *dev = container_of(n, struct device, knode_class);
+
+	put_device(dev);
+}
+
 int __class_register(struct class *cls, struct lock_class_key *key)
 {
 	struct class_private *cp;
@@ -145,7 +159,7 @@
 	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
 	if (!cp)
 		return -ENOMEM;
-	INIT_LIST_HEAD(&cp->class_devices);
+	klist_init(&cp->class_devices, klist_class_dev_get, klist_class_dev_put);
 	INIT_LIST_HEAD(&cp->class_interfaces);
 	kset_init(&cp->class_dirs);
 	__mutex_init(&cp->class_mutex, "struct class mutex", key);
@@ -269,6 +283,71 @@
 #endif
 
 /**
+ * class_dev_iter_init - initialize class device iterator
+ * @iter: class iterator to initialize
+ * @class: the class we wanna iterate over
+ * @start: the device to start iterating from, if any
+ * @type: device_type of the devices to iterate over, NULL for all
+ *
+ * Initialize class iterator @iter such that it iterates over devices
+ * of @class.  If @start is set, the list iteration will start there,
+ * otherwise if it is NULL, the iteration starts at the beginning of
+ * the list.
+ */
+void class_dev_iter_init(struct class_dev_iter *iter, struct class *class,
+			 struct device *start, const struct device_type *type)
+{
+	struct klist_node *start_knode = NULL;
+
+	if (start)
+		start_knode = &start->knode_class;
+	klist_iter_init_node(&class->p->class_devices, &iter->ki, start_knode);
+	iter->type = type;
+}
+EXPORT_SYMBOL_GPL(class_dev_iter_init);
+
+/**
+ * class_dev_iter_next - iterate to the next device
+ * @iter: class iterator to proceed
+ *
+ * Proceed @iter to the next device and return it.  Returns NULL if
+ * iteration is complete.
+ *
+ * The returned device is referenced and won't be released till
+ * iterator is proceed to the next device or exited.  The caller is
+ * free to do whatever it wants to do with the device including
+ * calling back into class code.
+ */
+struct device *class_dev_iter_next(struct class_dev_iter *iter)
+{
+	struct klist_node *knode;
+	struct device *dev;
+
+	while (1) {
+		knode = klist_next(&iter->ki);
+		if (!knode)
+			return NULL;
+		dev = container_of(knode, struct device, knode_class);
+		if (!iter->type || iter->type == dev->type)
+			return dev;
+	}
+}
+EXPORT_SYMBOL_GPL(class_dev_iter_next);
+
+/**
+ * class_dev_iter_exit - finish iteration
+ * @iter: class iterator to finish
+ *
+ * Finish an iteration.  Always call this function after iteration is
+ * complete whether the iteration ran till the end or not.
+ */
+void class_dev_iter_exit(struct class_dev_iter *iter)
+{
+	klist_iter_exit(&iter->ki);
+}
+EXPORT_SYMBOL_GPL(class_dev_iter_exit);
+
+/**
  * class_for_each_device - device iterator
  * @class: the class we're iterating
  * @start: the device to start with in the list, if any.
@@ -283,13 +362,13 @@
  * We check the return of @fn each time. If it returns anything
  * other than 0, we break out and return that value.
  *
- * Note, we hold class->class_mutex in this function, so it can not be
- * re-acquired in @fn, otherwise it will self-deadlocking. For
- * example, calls to add or remove class members would be verboten.
+ * @fn is allowed to do anything including calling back into class
+ * code.  There's no locking restriction.
  */
 int class_for_each_device(struct class *class, struct device *start,
 			  void *data, int (*fn)(struct device *, void *))
 {
+	struct class_dev_iter iter;
 	struct device *dev;
 	int error = 0;
 
@@ -301,20 +380,13 @@
 		return -EINVAL;
 	}
 
-	mutex_lock(&class->p->class_mutex);
-	list_for_each_entry(dev, &class->p->class_devices, node) {
-		if (start) {
-			if (start == dev)
-				start = NULL;
-			continue;
-		}
-		dev = get_device(dev);
+	class_dev_iter_init(&iter, class, start, NULL);
+	while ((dev = class_dev_iter_next(&iter))) {
 		error = fn(dev, data);
-		put_device(dev);
 		if (error)
 			break;
 	}
-	mutex_unlock(&class->p->class_mutex);
+	class_dev_iter_exit(&iter);
 
 	return error;
 }
@@ -337,16 +409,15 @@
  *
  * Note, you will need to drop the reference with put_device() after use.
  *
- * We hold class->class_mutex in this function, so it can not be
- * re-acquired in @match, otherwise it will self-deadlocking. For
- * example, calls to add or remove class members would be verboten.
+ * @fn is allowed to do anything including calling back into class
+ * code.  There's no locking restriction.
  */
 struct device *class_find_device(struct class *class, struct device *start,
 				 void *data,
 				 int (*match)(struct device *, void *))
 {
+	struct class_dev_iter iter;
 	struct device *dev;
-	int found = 0;
 
 	if (!class)
 		return NULL;
@@ -356,29 +427,23 @@
 		return NULL;
 	}
 
-	mutex_lock(&class->p->class_mutex);
-	list_for_each_entry(dev, &class->p->class_devices, node) {
-		if (start) {
-			if (start == dev)
-				start = NULL;
-			continue;
-		}
-		dev = get_device(dev);
+	class_dev_iter_init(&iter, class, start, NULL);
+	while ((dev = class_dev_iter_next(&iter))) {
 		if (match(dev, data)) {
-			found = 1;
+			get_device(dev);
 			break;
-		} else
-			put_device(dev);
+		}
 	}
-	mutex_unlock(&class->p->class_mutex);
+	class_dev_iter_exit(&iter);
 
-	return found ? dev : NULL;
+	return dev;
 }
 EXPORT_SYMBOL_GPL(class_find_device);
 
 int class_interface_register(struct class_interface *class_intf)
 {
 	struct class *parent;
+	struct class_dev_iter iter;
 	struct device *dev;
 
 	if (!class_intf || !class_intf->class)
@@ -391,8 +456,10 @@
 	mutex_lock(&parent->p->class_mutex);
 	list_add_tail(&class_intf->node, &parent->p->class_interfaces);
 	if (class_intf->add_dev) {
-		list_for_each_entry(dev, &parent->p->class_devices, node)
+		class_dev_iter_init(&iter, parent, NULL, NULL);
+		while ((dev = class_dev_iter_next(&iter)))
 			class_intf->add_dev(dev, class_intf);
+		class_dev_iter_exit(&iter);
 	}
 	mutex_unlock(&parent->p->class_mutex);
 
@@ -402,6 +469,7 @@
 void class_interface_unregister(struct class_interface *class_intf)
 {
 	struct class *parent = class_intf->class;
+	struct class_dev_iter iter;
 	struct device *dev;
 
 	if (!parent)
@@ -410,8 +478,10 @@
 	mutex_lock(&parent->p->class_mutex);
 	list_del_init(&class_intf->node);
 	if (class_intf->remove_dev) {
-		list_for_each_entry(dev, &parent->p->class_devices, node)
+		class_dev_iter_init(&iter, parent, NULL, NULL);
+		while ((dev = class_dev_iter_next(&iter)))
 			class_intf->remove_dev(dev, class_intf);
+		class_dev_iter_exit(&iter);
 	}
 	mutex_unlock(&parent->p->class_mutex);
 

diff --git a/drivers/base/core.c b/drivers/base/core.c
index d021c98..b98cb14 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c

@@ -536,7 +536,6 @@
 	klist_init(&dev->klist_children, klist_children_get,
 		   klist_children_put);
 	INIT_LIST_HEAD(&dev->dma_pools);
-	INIT_LIST_HEAD(&dev->node);
 	init_MUTEX(&dev->sem);
 	spin_lock_init(&dev->devres_lock);
 	INIT_LIST_HEAD(&dev->devres_head);
@@ -916,7 +915,8 @@
 	if (dev->class) {
 		mutex_lock(&dev->class->p->class_mutex);
 		/* tie the class to the device */
-		list_add_tail(&dev->node, &dev->class->p->class_devices);
+		klist_add_tail(&dev->knode_class,
+			       &dev->class->p->class_devices);
 
 		/* notify any interfaces that the device is here */
 		list_for_each_entry(class_intf,
@@ -1032,7 +1032,7 @@
 			if (class_intf->remove_dev)
 				class_intf->remove_dev(dev, class_intf);
 		/* remove the device from the class list */
-		list_del_init(&dev->node);
+		klist_del(&dev->knode_class);
 		mutex_unlock(&dev->class->p->class_mutex);
 	}
 	device_remove_file(dev, &uevent_attr);

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 0c39782..aa69556 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c

@@ -109,12 +109,12 @@
 static int
 aoedisk_add_sysfs(struct aoedev *d)
 {
-	return sysfs_create_group(&d->gd->dev.kobj, &attr_group);
+	return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group);
 }
 void
 aoedisk_rm_sysfs(struct aoedev *d)
 {
-	sysfs_remove_group(&d->gd->dev.kobj, &attr_group);
+	sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group);
 }
 
 static int
@@ -276,7 +276,7 @@
 	gd->first_minor = d->sysminor * AOE_PARTITIONS;
 	gd->fops = &aoe_bdops;
 	gd->private_data = d;
-	gd->capacity = d->ssize;
+	set_capacity(gd, d->ssize);
 	snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
 		d->aoemajor, d->aoeminor);
 

diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 2f17462..961d29a 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c

@@ -645,7 +645,7 @@
 		unsigned long flags;
 		u64 ssize;
 
-		ssize = d->gd->capacity;
+		ssize = get_capacity(d->gd);
 		bd = bdget_disk(d->gd, 0);
 
 		if (bd) {
@@ -707,7 +707,7 @@
 	if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
 		return;
 	if (d->gd != NULL) {
-		d->gd->capacity = ssize;
+		set_capacity(d->gd, ssize);
 		d->flags |= DEVFL_NEWSIZE;
 	} else
 		d->flags |= DEVFL_GDALLOC;
@@ -756,12 +756,17 @@
 	unsigned long n_sect = bio->bi_size >> 9;
 	const int rw = bio_data_dir(bio);
 	struct hd_struct *part;
+	int cpu;
 
-	part = get_part(disk, sector);
-	all_stat_inc(disk, part, ios[rw], sector);
-	all_stat_add(disk, part, ticks[rw], duration, sector);
-	all_stat_add(disk, part, sectors[rw], n_sect, sector);
-	all_stat_add(disk, part, io_ticks, duration, sector);
+	cpu = part_stat_lock();
+	part = disk_map_sector_rcu(disk, sector);
+
+	part_stat_inc(cpu, part, ios[rw]);
+	part_stat_add(cpu, part, ticks[rw], duration);
+	part_stat_add(cpu, part, sectors[rw], n_sect);
+	part_stat_add(cpu, part, io_ticks, duration);
+
+	part_stat_unlock();
 }
 
 void

diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index a1d813a..6a8038d 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c

@@ -91,7 +91,7 @@
 	}
 
 	if (d->gd)
-		d->gd->capacity = 0;
+		set_capacity(d->gd, 0);
 
 	d->flags &= ~DEVFL_UP;
 }

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index b73116e..1e1f915 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c

@@ -3460,8 +3460,8 @@
 	       hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not");
 
 	hba[i]->cmd_pool_bits =
-	    kmalloc(((hba[i]->nr_cmds + BITS_PER_LONG -
-		      1) / BITS_PER_LONG) * sizeof(unsigned long), GFP_KERNEL);
+	    kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
+			* sizeof(unsigned long), GFP_KERNEL);
 	hba[i]->cmd_pool = (CommandList_struct *)
 	    pci_alloc_consistent(hba[i]->pdev,
 		    hba[i]->nr_cmds * sizeof(CommandList_struct),
@@ -3493,8 +3493,8 @@
 	/* command and error info recs zeroed out before
 	   they are used */
 	memset(hba[i]->cmd_pool_bits, 0,
-	       ((hba[i]->nr_cmds + BITS_PER_LONG -
-		 1) / BITS_PER_LONG) * sizeof(unsigned long));
+	       DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
+			* sizeof(unsigned long));
 
 	hba[i]->num_luns = 0;
 	hba[i]->highest_lun = -1;

diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index e1233aa..a3fd87b 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c

@@ -365,7 +365,7 @@
 
 static int 
 cciss_scsi_add_entry(int ctlr, int hostno, 
-		unsigned char *scsi3addr, int devtype,
+		struct cciss_scsi_dev_t *device,
 		struct scsi2map *added, int *nadded)
 {
 	/* assumes hba[ctlr]->scsi_ctlr->lock is held */ 
@@ -384,12 +384,12 @@
 	lun = 0;
 	/* Is this device a non-zero lun of a multi-lun device */
 	/* byte 4 of the 8-byte LUN addr will contain the logical unit no. */
-	if (scsi3addr[4] != 0) {
+	if (device->scsi3addr[4] != 0) {
 		/* Search through our list and find the device which */
 		/* has the same 8 byte LUN address, excepting byte 4. */
 		/* Assign the same bus and target for this new LUN. */
 		/* Use the logical unit number from the firmware. */
-		memcpy(addr1, scsi3addr, 8);
+		memcpy(addr1, device->scsi3addr, 8);
 		addr1[4] = 0;
 		for (i = 0; i < n; i++) {
 			sd = &ccissscsi[ctlr].dev[i];
@@ -399,7 +399,7 @@
 			if (memcmp(addr1, addr2, 8) == 0) {
 				bus = sd->bus;
 				target = sd->target;
-				lun = scsi3addr[4];
+				lun = device->scsi3addr[4];
 				break;
 			}
 		}
@@ -420,8 +420,12 @@
 	added[*nadded].lun = sd->lun;
 	(*nadded)++;
 
-	memcpy(&sd->scsi3addr[0], scsi3addr, 8);
-	sd->devtype = devtype;
+	memcpy(sd->scsi3addr, device->scsi3addr, 8);
+	memcpy(sd->vendor, device->vendor, sizeof(sd->vendor));
+	memcpy(sd->revision, device->revision, sizeof(sd->revision));
+	memcpy(sd->device_id, device->device_id, sizeof(sd->device_id));
+	sd->devtype = device->devtype;
+
 	ccissscsi[ctlr].ndevices++;
 
 	/* initially, (before registering with scsi layer) we don't 
@@ -487,6 +491,22 @@
 	CPQ_TAPE_UNLOCK(ctlr, flags);
 }
 
+static int device_is_the_same(struct cciss_scsi_dev_t *dev1,
+	struct cciss_scsi_dev_t *dev2)
+{
+	return dev1->devtype == dev2->devtype &&
+		memcmp(dev1->scsi3addr, dev2->scsi3addr,
+			sizeof(dev1->scsi3addr)) == 0 &&
+		memcmp(dev1->device_id, dev2->device_id,
+			sizeof(dev1->device_id)) == 0 &&
+		memcmp(dev1->vendor, dev2->vendor,
+			sizeof(dev1->vendor)) == 0 &&
+		memcmp(dev1->model, dev2->model,
+			sizeof(dev1->model)) == 0 &&
+		memcmp(dev1->revision, dev2->revision,
+			sizeof(dev1->revision)) == 0;
+}
+
 static int
 adjust_cciss_scsi_table(int ctlr, int hostno,
 	struct cciss_scsi_dev_t sd[], int nsds)
@@ -532,7 +552,7 @@
 		for (j=0;j<nsds;j++) {
 			if (SCSI3ADDR_EQ(sd[j].scsi3addr,
 				csd->scsi3addr)) {
-				if (sd[j].devtype == csd->devtype)
+				if (device_is_the_same(&sd[j], csd))
 					found=2;
 				else
 					found=1;
@@ -548,22 +568,26 @@
 			cciss_scsi_remove_entry(ctlr, hostno, i,
 				removed, &nremoved);
 			/* remove ^^^, hence i not incremented */
-		} 
-		else if (found == 1) { /* device is different kind */
+		} else if (found == 1) { /* device is different in some way */
 			changes++;
-			printk("cciss%d: device c%db%dt%dl%d type changed "
-				"(device type now %s).\n",
-				ctlr, hostno, csd->bus, csd->target, csd->lun,
-					scsi_device_type(csd->devtype));
+			printk("cciss%d: device c%db%dt%dl%d has changed.\n",
+				ctlr, hostno, csd->bus, csd->target, csd->lun);
 			cciss_scsi_remove_entry(ctlr, hostno, i,
 				removed, &nremoved);
 			/* remove ^^^, hence i not incremented */
-			if (cciss_scsi_add_entry(ctlr, hostno,
-				&sd[j].scsi3addr[0], sd[j].devtype,
+			if (cciss_scsi_add_entry(ctlr, hostno, &sd[j],
 				added, &nadded) != 0)
 				/* we just removed one, so add can't fail. */
 					BUG();
 			csd->devtype = sd[j].devtype;
+			memcpy(csd->device_id, sd[j].device_id,
+				sizeof(csd->device_id));
+			memcpy(csd->vendor, sd[j].vendor,
+				sizeof(csd->vendor));
+			memcpy(csd->model, sd[j].model,
+				sizeof(csd->model));
+			memcpy(csd->revision, sd[j].revision,
+				sizeof(csd->revision));
 		} else 		/* device is same as it ever was, */
 			i++;	/* so just move along. */
 	}
@@ -577,7 +601,7 @@
 			csd = &ccissscsi[ctlr].dev[j];
 			if (SCSI3ADDR_EQ(sd[i].scsi3addr,
 				csd->scsi3addr)) {
-				if (sd[i].devtype == csd->devtype)
+				if (device_is_the_same(&sd[i], csd))
 					found=2;	/* found device */
 				else
 					found=1; 	/* found a bug. */
@@ -586,16 +610,14 @@
 		}
 		if (!found) {
 			changes++;
-			if (cciss_scsi_add_entry(ctlr, hostno, 
-
-				&sd[i].scsi3addr[0], sd[i].devtype,
+			if (cciss_scsi_add_entry(ctlr, hostno, &sd[i],
 				added, &nadded) != 0)
 				break;
 		} else if (found == 1) {
 			/* should never happen... */
 			changes++;
-			printk("cciss%d: device unexpectedly changed type\n",
-				ctlr);
+			printk(KERN_WARNING "cciss%d: device "
+				"unexpectedly changed\n", ctlr);
 			/* but if it does happen, we just ignore that device */
 		}
 	}
@@ -1012,7 +1034,8 @@
 
 static int
 cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, 
-		 unsigned char *buf, unsigned char bufsize)
+	unsigned char page, unsigned char *buf,
+	unsigned char bufsize)
 {
 	int rc;
 	CommandList_struct *cp;
@@ -1032,8 +1055,8 @@
 	ei = cp->err_info; 
 
 	cdb[0] = CISS_INQUIRY;
-	cdb[1] = 0;
-	cdb[2] = 0;
+	cdb[1] = (page != 0);
+	cdb[2] = page;
 	cdb[3] = 0;
 	cdb[4] = bufsize;
 	cdb[5] = 0;
@@ -1053,6 +1076,25 @@
 	return rc;	
 }
 
+/* Get the device id from inquiry page 0x83 */
+static int cciss_scsi_get_device_id(ctlr_info_t *c, unsigned char *scsi3addr,
+	unsigned char *device_id, int buflen)
+{
+	int rc;
+	unsigned char *buf;
+
+	if (buflen > 16)
+		buflen = 16;
+	buf = kzalloc(64, GFP_KERNEL);
+	if (!buf)
+		return -1;
+	rc = cciss_scsi_do_inquiry(c, scsi3addr, 0x83, buf, 64);
+	if (rc == 0)
+		memcpy(device_id, &buf[8], buflen);
+	kfree(buf);
+	return rc != 0;
+}
+
 static int
 cciss_scsi_do_report_phys_luns(ctlr_info_t *c, 
 		ReportLunData_struct *buf, int bufsize)
@@ -1142,25 +1184,21 @@
 	ctlr_info_t *c;
 	__u32 num_luns=0;
 	unsigned char *ch;
-	/* unsigned char found[CCISS_MAX_SCSI_DEVS_PER_HBA]; */
-	struct cciss_scsi_dev_t currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA];
+	struct cciss_scsi_dev_t *currentsd, *this_device;
 	int ncurrent=0;
 	int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8;
 	int i;
 
 	c = (ctlr_info_t *) hba[cntl_num];	
 	ld_buff = kzalloc(reportlunsize, GFP_KERNEL);
-	if (ld_buff == NULL) {
-		printk(KERN_ERR "cciss: out of memory\n");
-		return;
-	}
 	inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
-        if (inq_buff == NULL) {
-                printk(KERN_ERR "cciss: out of memory\n");
-                kfree(ld_buff);
-                return;
+	currentsd = kzalloc(sizeof(*currentsd) *
+			(CCISS_MAX_SCSI_DEVS_PER_HBA+1), GFP_KERNEL);
+	if (ld_buff == NULL || inq_buff == NULL || currentsd == NULL) {
+		printk(KERN_ERR "cciss: out of memory\n");
+		goto out;
 	}
-
+	this_device = &currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA];
 	if (cciss_scsi_do_report_phys_luns(c, ld_buff, reportlunsize) == 0) {
 		ch = &ld_buff->LUNListLength[0];
 		num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8;
@@ -1179,23 +1217,34 @@
 
 
 	/* adjust our table of devices */	
-	for(i=0; i<num_luns; i++)
-	{
-		int devtype;
-
+	for (i = 0; i < num_luns; i++) {
 		/* for each physical lun, do an inquiry */
 		if (ld_buff->LUN[i][3] & 0xC0) continue;
 		memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE);
 		memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8);
 
-		if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, inq_buff,
-			(unsigned char) OBDR_TAPE_INQ_SIZE) != 0) {
+		if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, 0, inq_buff,
+			(unsigned char) OBDR_TAPE_INQ_SIZE) != 0)
 			/* Inquiry failed (msg printed already) */
-			devtype = 0; /* so we will skip this device. */
-		} else /* what kind of device is this? */
-			devtype = (inq_buff[0] & 0x1f);
+			continue; /* so we will skip this device. */
 
-		switch (devtype)
+		this_device->devtype = (inq_buff[0] & 0x1f);
+		this_device->bus = -1;
+		this_device->target = -1;
+		this_device->lun = -1;
+		memcpy(this_device->scsi3addr, scsi3addr, 8);
+		memcpy(this_device->vendor, &inq_buff[8],
+			sizeof(this_device->vendor));
+		memcpy(this_device->model, &inq_buff[16],
+			sizeof(this_device->model));
+		memcpy(this_device->revision, &inq_buff[32],
+			sizeof(this_device->revision));
+		memset(this_device->device_id, 0,
+			sizeof(this_device->device_id));
+		cciss_scsi_get_device_id(hba[cntl_num], scsi3addr,
+			this_device->device_id, sizeof(this_device->device_id));
+
+		switch (this_device->devtype)
 		{
 		  case 0x05: /* CD-ROM */ {
 
@@ -1220,15 +1269,10 @@
 			if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
 				printk(KERN_INFO "cciss%d: %s ignored, "
 					"too many devices.\n", cntl_num,
-					scsi_device_type(devtype));
+					scsi_device_type(this_device->devtype));
 				break;
 			}
-			memcpy(&currentsd[ncurrent].scsi3addr[0], 
-				&scsi3addr[0], 8);
-			currentsd[ncurrent].devtype = devtype;
-			currentsd[ncurrent].bus = -1;
-			currentsd[ncurrent].target = -1;
-			currentsd[ncurrent].lun = -1;
+			currentsd[ncurrent] = *this_device;
 			ncurrent++;
 			break;
 		  default: 
@@ -1240,6 +1284,7 @@
 out:
 	kfree(inq_buff);
 	kfree(ld_buff);
+	kfree(currentsd);
 	return;
 }
 

diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h
index d9c2c58..7b75024 100644
--- a/drivers/block/cciss_scsi.h
+++ b/drivers/block/cciss_scsi.h

@@ -66,6 +66,10 @@
 	int devtype;
 	int bus, target, lun;		/* as presented to the OS */
 	unsigned char scsi3addr[8];	/* as presented to the HW */
+	unsigned char device_id[16];	/* from inquiry pg. 0x83 */
+	unsigned char vendor[8];	/* bytes 8-15 of inquiry data */
+	unsigned char model[16];	/* bytes 16-31 of inquiry data */
+	unsigned char revision[4];	/* bytes 32-35 of inquiry data */
 };
 
 struct cciss_scsi_hba_t {

diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 09c1434..3d96752 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c

@@ -424,7 +424,7 @@
 		hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t),
 		&(hba[i]->cmd_pool_dhandle));
 	hba[i]->cmd_pool_bits = kcalloc(
-		(NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG, sizeof(unsigned long),
+		DIV_ROUND_UP(NR_CMDS, BITS_PER_LONG), sizeof(unsigned long),
 		GFP_KERNEL);
 
 	if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool)

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 395f8ea..cf64ddf 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c

@@ -423,8 +423,15 @@
  * 1581's logical side 0 is on physical side 1, whereas the Sharp's logical
  * side 0 is on physical side 0 (but with the misnamed sector IDs).
  * 'stretch' should probably be renamed to something more general, like
- * 'options'.  Other parameters should be self-explanatory (see also
- * setfdprm(8)).
+ * 'options'.
+ *
+ * Bits 2 through 9 of 'stretch' tell the number of the first sector.
+ * The LSB (bit 2) is flipped. For most disks, the first sector
+ * is 1 (represented by 0x00<<2).  For some CP/M and music sampler
+ * disks (such as Ensoniq EPS 16plus) it is 0 (represented as 0x01<<2).
+ * For Amstrad CPC disks it is 0xC1 (represented as 0xC0<<2).
+ *
+ * Other parameters should be self-explanatory (see also setfdprm(8)).
  */
 /*
 	    Size
@@ -1355,20 +1362,20 @@
 	}
 
 	/* Convert step rate from microseconds to milliseconds and 4 bits */
-	srt = 16 - (DP->srt * scale_dtr / 1000 + NOMINAL_DTR - 1) / NOMINAL_DTR;
+	srt = 16 - DIV_ROUND_UP(DP->srt * scale_dtr / 1000, NOMINAL_DTR);
 	if (slow_floppy) {
 		srt = srt / 4;
 	}
 	SUPBOUND(srt, 0xf);
 	INFBOUND(srt, 0);
 
-	hlt = (DP->hlt * scale_dtr / 2 + NOMINAL_DTR - 1) / NOMINAL_DTR;
+	hlt = DIV_ROUND_UP(DP->hlt * scale_dtr / 2, NOMINAL_DTR);
 	if (hlt < 0x01)
 		hlt = 0x01;
 	else if (hlt > 0x7f)
 		hlt = hlt_max_code;
 
-	hut = (DP->hut * scale_dtr / 16 + NOMINAL_DTR - 1) / NOMINAL_DTR;
+	hut = DIV_ROUND_UP(DP->hut * scale_dtr / 16, NOMINAL_DTR);
 	if (hut < 0x1)
 		hut = 0x1;
 	else if (hut > 0xf)
@@ -2236,9 +2243,9 @@
 			}
 		}
 	}
-	if (_floppy->stretch & FD_ZEROBASED) {
+	if (_floppy->stretch & FD_SECTBASEMASK) {
 		for (count = 0; count < F_SECT_PER_TRACK; count++)
-			here[count].sect--;
+			here[count].sect += FD_SECTBASE(_floppy) - 1;
 	}
 }
 
@@ -2385,7 +2392,7 @@
 
 #ifdef FLOPPY_SANITY_CHECK
 	if (nr_sectors / ssize >
-	    (in_sector_offset + current_count_sectors + ssize - 1) / ssize) {
+	    DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) {
 		DPRINT("long rw: %x instead of %lx\n",
 		       nr_sectors, current_count_sectors);
 		printk("rs=%d s=%d\n", R_SECTOR, SECTOR);
@@ -2649,7 +2656,7 @@
 	}
 	HEAD = fsector_t / _floppy->sect;
 
-	if (((_floppy->stretch & (FD_SWAPSIDES | FD_ZEROBASED)) ||
+	if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) ||
 	     TESTF(FD_NEED_TWADDLE)) && fsector_t < _floppy->sect)
 		max_sector = _floppy->sect;
 
@@ -2679,7 +2686,7 @@
 	CODE2SIZE;
 	SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE;
 	SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) +
-	    ((_floppy->stretch & FD_ZEROBASED) ? 0 : 1);
+	    FD_SECTBASE(_floppy);
 
 	/* tracksize describes the size which can be filled up with sectors
 	 * of size ssize.
@@ -3311,7 +3318,7 @@
 	    g->head <= 0 ||
 	    g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) ||
 	    /* check if reserved bits are set */
-	    (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_ZEROBASED)) != 0)
+	    (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0)
 		return -EINVAL;
 	if (type) {
 		if (!capable(CAP_SYS_ADMIN))
@@ -3356,7 +3363,7 @@
 		if (DRS->maxblock > user_params[drive].sect ||
 		    DRS->maxtrack ||
 		    ((user_params[drive].sect ^ oldStretch) &
-		     (FD_SWAPSIDES | FD_ZEROBASED)))
+		     (FD_SWAPSIDES | FD_SECTBASEMASK)))
 			invalidate_drive(bdev);
 		else
 			process_fd_request();

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 1778e4a..7b33512 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c

@@ -403,7 +403,7 @@
 	BUG_ON(lo->magic != LO_MAGIC);
 
 	lo->pid = current->pid;
-	ret = sysfs_create_file(&lo->disk->dev.kobj, &pid_attr.attr);
+	ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
 	if (ret) {
 		printk(KERN_ERR "nbd: sysfs_create_file failed!");
 		return ret;
@@ -412,7 +412,7 @@
 	while ((req = nbd_read_stat(lo)) != NULL)
 		nbd_end_request(req);
 
-	sysfs_remove_file(&lo->disk->dev.kobj, &pid_attr.attr);
+	sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
 	return 0;
 }
 

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 29b7a64..0e07715 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c

@@ -2544,7 +2544,7 @@
 		if (last_zone != zone) {
 			BUG_ON(last_zone != zone + pd->settings.size);
 			first_sectors = last_zone - bio->bi_sector;
-			bp = bio_split(bio, bio_split_pool, first_sectors);
+			bp = bio_split(bio, first_sectors);
 			BUG_ON(!bp);
 			pkt_make_request(q, &bp->bio1);
 			pkt_make_request(q, &bp->bio2);
@@ -2911,7 +2911,7 @@
 	if (!disk->queue)
 		goto out_mem2;
 
-	pd->pkt_dev = MKDEV(disk->major, disk->first_minor);
+	pd->pkt_dev = MKDEV(pktdev_major, idx);
 	ret = pkt_new_dev(pd, dev);
 	if (ret)
 		goto out_new_dev;

diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index d797e20..936466f 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c

@@ -199,7 +199,8 @@
 		if (blk_fs_request(req)) {
 			if (ps3disk_submit_request_sg(dev, req))
 				break;
-		} else if (req->cmd_type == REQ_TYPE_FLUSH) {
+		} else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
+			   req->cmd[0] == REQ_LB_OP_FLUSH) {
 			if (ps3disk_submit_flush_request(dev, req))
 				break;
 		} else {
@@ -257,7 +258,8 @@
 		return IRQ_HANDLED;
 	}
 
-	if (req->cmd_type == REQ_TYPE_FLUSH) {
+	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
+	    req->cmd[0] == REQ_LB_OP_FLUSH) {
 		read = 0;
 		num_sectors = req->hard_cur_sectors;
 		op = "flush";
@@ -405,7 +407,8 @@
 
 	dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
 
-	req->cmd_type = REQ_TYPE_FLUSH;
+	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
+	req->cmd[0] = REQ_LB_OP_FLUSH;
 }
 
 static unsigned long ps3disk_mask;
@@ -538,7 +541,7 @@
 	struct ps3disk_private *priv = dev->sbd.core.driver_data;
 
 	mutex_lock(&ps3disk_mask_mutex);
-	__clear_bit(priv->gendisk->first_minor / PS3DISK_MINORS,
+	__clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS,
 		    &ps3disk_mask);
 	mutex_unlock(&ps3disk_mask_mutex);
 	del_gendisk(priv->gendisk);

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4225109..6ec5fc0 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c

@@ -47,20 +47,20 @@
 
 	spin_lock_irqsave(&vblk->lock, flags);
 	while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) {
-		int uptodate;
+		int error;
 		switch (vbr->status) {
 		case VIRTIO_BLK_S_OK:
-			uptodate = 1;
+			error = 0;
 			break;
 		case VIRTIO_BLK_S_UNSUPP:
-			uptodate = -ENOTTY;
+			error = -ENOTTY;
 			break;
 		default:
-			uptodate = 0;
+			error = -EIO;
 			break;
 		}
 
-		end_dequeued_request(vbr->req, uptodate);
+		__blk_end_request(vbr->req, error, blk_rq_bytes(vbr->req));
 		list_del(&vbr->list);
 		mempool_free(vbr, vblk->pool);
 	}
@@ -84,11 +84,11 @@
 	if (blk_fs_request(vbr->req)) {
 		vbr->out_hdr.type = 0;
 		vbr->out_hdr.sector = vbr->req->sector;
-		vbr->out_hdr.ioprio = vbr->req->ioprio;
+		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
 	} else if (blk_pc_request(vbr->req)) {
 		vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
 		vbr->out_hdr.sector = 0;
-		vbr->out_hdr.ioprio = vbr->req->ioprio;
+		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
 	} else {
 		/* We don't put anything else in the queue. */
 		BUG();

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3ca643c..bff602c 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c

@@ -105,15 +105,17 @@
 #define GRANT_INVALID_REF	0
 
 #define PARTS_PER_DISK		16
+#define PARTS_PER_EXT_DISK      256
 
 #define BLKIF_MAJOR(dev) ((dev)>>8)
 #define BLKIF_MINOR(dev) ((dev) & 0xff)
 
-#define DEV_NAME	"xvd"	/* name in /dev */
+#define EXT_SHIFT 28
+#define EXTENDED (1<<EXT_SHIFT)
+#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
+#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
 
-/* Information about our VBDs. */
-#define MAX_VBDS 64
-static LIST_HEAD(vbds_list);
+#define DEV_NAME	"xvd"	/* name in /dev */
 
 static int get_id_from_freelist(struct blkfront_info *info)
 {
@@ -386,31 +388,60 @@
 }
 
 
-static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity,
-			       int vdevice, u16 vdisk_info, u16 sector_size,
-			       struct blkfront_info *info)
+static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+			       struct blkfront_info *info,
+			       u16 vdisk_info, u16 sector_size)
 {
 	struct gendisk *gd;
 	int nr_minors = 1;
 	int err = -ENODEV;
+	unsigned int offset;
+	int minor;
+	int nr_parts;
 
 	BUG_ON(info->gd != NULL);
 	BUG_ON(info->rq != NULL);
 
-	if ((minor % PARTS_PER_DISK) == 0)
-		nr_minors = PARTS_PER_DISK;
+	if ((info->vdevice>>EXT_SHIFT) > 1) {
+		/* this is above the extended range; something is wrong */
+		printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
+		return -ENODEV;
+	}
+
+	if (!VDEV_IS_EXTENDED(info->vdevice)) {
+		minor = BLKIF_MINOR(info->vdevice);
+		nr_parts = PARTS_PER_DISK;
+	} else {
+		minor = BLKIF_MINOR_EXT(info->vdevice);
+		nr_parts = PARTS_PER_EXT_DISK;
+	}
+
+	if ((minor % nr_parts) == 0)
+		nr_minors = nr_parts;
 
 	gd = alloc_disk(nr_minors);
 	if (gd == NULL)
 		goto out;
 
-	if (nr_minors > 1)
-		sprintf(gd->disk_name, "%s%c", DEV_NAME,
-			'a' + minor / PARTS_PER_DISK);
-	else
-		sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
-			'a' + minor / PARTS_PER_DISK,
-			minor % PARTS_PER_DISK);
+	offset = minor / nr_parts;
+
+	if (nr_minors > 1) {
+		if (offset < 26)
+			sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
+		else
+			sprintf(gd->disk_name, "%s%c%c", DEV_NAME,
+				'a' + ((offset / 26)-1), 'a' + (offset % 26));
+	} else {
+		if (offset < 26)
+			sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
+				'a' + offset,
+				minor & (nr_parts - 1));
+		else
+			sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME,
+				'a' + ((offset / 26) - 1),
+				'a' + (offset % 26),
+				minor & (nr_parts - 1));
+	}
 
 	gd->major = XENVBD_MAJOR;
 	gd->first_minor = minor;
@@ -699,8 +730,13 @@
 	err = xenbus_scanf(XBT_NIL, dev->nodename,
 			   "virtual-device", "%i", &vdevice);
 	if (err != 1) {
-		xenbus_dev_fatal(dev, err, "reading virtual-device");
-		return err;
+		/* go looking in the extended area instead */
+		err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
+				   "%i", &vdevice);
+		if (err != 1) {
+			xenbus_dev_fatal(dev, err, "reading virtual-device");
+			return err;
+		}
 	}
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
@@ -861,9 +897,7 @@
 	if (err)
 		info->feature_barrier = 0;
 
-	err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice),
-				  sectors, info->vdevice,
-				  binfo, sector_size, info);
+	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
 	if (err) {
 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
 				 info->xbdev->otherend);

diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index 1e55a65..32f3a8e 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c

@@ -256,7 +256,6 @@
 		BT_ERR("%s urb %p submission failed (%d)",
 						hdev->name, urb, -err);
 		usb_unanchor_urb(urb);
-		kfree(buf);
 	}
 
 	usb_free_urb(urb);
@@ -298,7 +297,6 @@
 		BT_ERR("%s urb %p submission failed (%d)",
 						hdev->name, urb, -err);
 		usb_unanchor_urb(urb);
-		kfree(buf);
 	}
 
 	usb_free_urb(urb);

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 29ae998..af472e0 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c

@@ -102,6 +102,7 @@
 	{ USB_DEVICE(0x0a5c, 0x2101), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
 
 	/* Broadcom BCM2046 */
+	{ USB_DEVICE(0x0a5c, 0x2146), .driver_info = BTUSB_RESET },
 	{ USB_DEVICE(0x0a5c, 0x2151), .driver_info = BTUSB_RESET },
 
 	/* Apple MacBook Pro with Broadcom chip */
@@ -113,6 +114,7 @@
 
 	/* Targus ACB10US */
 	{ USB_DEVICE(0x0a5c, 0x2100), .driver_info = BTUSB_RESET },
+	{ USB_DEVICE(0x0a5c, 0x2154), .driver_info = BTUSB_RESET },
 
 	/* ANYCOM Bluetooth USB-200 and USB-250 */
 	{ USB_DEVICE(0x0a5c, 0x2111), .driver_info = BTUSB_RESET },
@@ -150,6 +152,9 @@
 	{ USB_DEVICE(0x050d, 0x0012), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
 	{ USB_DEVICE(0x050d, 0x0013), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU },
 
+	/* Belkin F8T016 device */
+	{ USB_DEVICE(0x050d, 0x016a), .driver_info = BTUSB_RESET },
+
 	/* Digianswer devices */
 	{ USB_DEVICE(0x08fd, 0x0001), .driver_info = BTUSB_DIGIANSWER },
 	{ USB_DEVICE(0x08fd, 0x0002), .driver_info = BTUSB_IGNORE },
@@ -271,7 +276,6 @@
 		BT_ERR("%s urb %p submission failed (%d)",
 						hdev->name, urb, -err);
 		usb_unanchor_urb(urb);
-		kfree(buf);
 	}
 
 	usb_free_urb(urb);
@@ -354,7 +358,6 @@
 		BT_ERR("%s urb %p submission failed (%d)",
 						hdev->name, urb, -err);
 		usb_unanchor_urb(urb);
-		kfree(buf);
 	}
 
 	usb_free_urb(urb);
@@ -475,7 +478,6 @@
 		BT_ERR("%s urb %p submission failed (%d)",
 						hdev->name, urb, -err);
 		usb_unanchor_urb(urb);
-		kfree(buf);
 	}
 
 	usb_free_urb(urb);

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 74031de..d47f2f8 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c

@@ -2097,7 +2097,7 @@
 
 		len = nr * CD_FRAMESIZE_RAW;
 
-		ret = blk_rq_map_user(q, rq, ubuf, len);
+		ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
 		if (ret)
 			break;
 

diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1231d95..d6ba77a 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c

@@ -624,14 +624,14 @@
 		ctrl_outb(1, GDROM_DMA_STATUS_REG);
 		wait_event_interruptible_timeout(request_queue,
 			gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
-		err = gd.transfer;
+		err = gd.transfer ? -EIO : 0;
 		gd.transfer = 0;
 		gd.pending = 0;
 		/* now seek to take the request spinlock
 		* before handling ending the request */
 		spin_lock(&gdrom_lock);
 		list_del_init(&req->queuelist);
-		end_dequeued_request(req, 1 - err);
+		__blk_end_request(req, err, blk_rq_bytes(req));
 	}
 	spin_unlock(&gdrom_lock);
 	kfree(read_command);

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 7ce1ac4..6af435b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c

@@ -661,10 +661,10 @@
 	if (!disk || !disk->random)
 		return;
 	/* first major is 1, so we get >= 0x200 here */
-	DEBUG_ENT("disk event %d:%d\n", disk->major, disk->first_minor);
+	DEBUG_ENT("disk event %d:%d\n",
+		  MAJOR(disk_devt(disk)), MINOR(disk_devt(disk)));
 
-	add_timer_randomness(disk->random,
-			     0x100 + MKDEV(disk->major, disk->first_minor));
+	add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
 }
 #endif
 

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 94df917..0778d99 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c

@@ -364,7 +364,7 @@
 	int i;
 
 	status_block = dma_readl(dw, RAW.BLOCK);
-	status_xfer = dma_readl(dw, RAW.BLOCK);
+	status_xfer = dma_readl(dw, RAW.XFER);
 	status_err = dma_readl(dw, RAW.ERROR);
 
 	dev_vdbg(dw->dma.dev, "tasklet: status_block=%x status_err=%x\n",

diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c
index d568c65..d9e7a49 100644
--- a/drivers/hwmon/abituguru3.c
+++ b/drivers/hwmon/abituguru3.c

@@ -279,7 +279,7 @@
 		{ "OTES1 Fan",		36, 2, 60, 1, 0 },
 		{ NULL, 0, 0, 0, 0, 0 } }
 	},
-	{ 0x0011, NULL /* Abit AT8 32X, need DMI string */, {
+	{ 0x0011, "AT8 32X(ATI RD580-ULI M1575)", {
 		{ "CPU Core",		 0, 0, 10, 1, 0 },
 		{ "DDR",		 1, 0, 20, 1, 0 },
 		{ "DDR VTT",		 2, 0, 10, 1, 0 },
@@ -303,6 +303,7 @@
 		{ "SYS Fan",		34, 2, 60, 1, 0 },
 		{ "AUX1 Fan",		35, 2, 60, 1, 0 },
 		{ "AUX2 Fan",		36, 2, 60, 1, 0 },
+		{ "AUX3 Fan",		37, 2, 60, 1, 0 },
 		{ NULL, 0, 0, 0, 0, 0 } }
 	},
 	{ 0x0012, NULL /* Abit AN8 32X, need DMI string */, {

diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index f113308..d793cc0 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c

@@ -46,6 +46,8 @@
 #include <linux/err.h>
 #include <linux/mutex.h>
 #include <linux/sysfs.h>
+#include <linux/string.h>
+#include <linux/dmi.h>
 #include <asm/io.h>
 
 #define DRVNAME "it87"
@@ -236,6 +238,8 @@
 	/* Values read from Super-I/O config space */
 	u8 revision;
 	u8 vid_value;
+	/* Values set based on DMI strings */
+	u8 skip_pwm;
 };
 
 /* For each registered chip, we need to keep some data in memory.
@@ -964,6 +968,7 @@
 {
 	int err = -ENODEV;
 	u16 chip_type;
+	const char *board_vendor, *board_name;
 
 	superio_enter();
 	chip_type = force_id ? force_id : superio_inw(DEVID);
@@ -1022,6 +1027,24 @@
 			pr_info("it87: in7 is VCCH (+5V Stand-By)\n");
 	}
 
+	/* Disable specific features based on DMI strings */
+	board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
+	board_name = dmi_get_system_info(DMI_BOARD_NAME);
+	if (board_vendor && board_name) {
+		if (strcmp(board_vendor, "nVIDIA") == 0
+		 && strcmp(board_name, "FN68PT") == 0) {
+			/* On the Shuttle SN68PT, FAN_CTL2 is apparently not
+			   connected to a fan, but to something else. One user
+			   has reported instant system power-off when changing
+			   the PWM2 duty cycle, so we disable it.
+			   I use the board name string as the trigger in case
+			   the same board is ever used in other systems. */
+			pr_info("it87: Disabling pwm2 due to "
+				"hardware constraints\n");
+			sio_data->skip_pwm = (1 << 1);
+		}
+	}
+
 exit:
 	superio_exit();
 	return err;
@@ -1168,25 +1191,33 @@
 	}
 
 	if (enable_pwm_interface) {
-		if ((err = device_create_file(dev,
-		     &sensor_dev_attr_pwm1_enable.dev_attr))
-		 || (err = device_create_file(dev,
-		     &sensor_dev_attr_pwm2_enable.dev_attr))
-		 || (err = device_create_file(dev,
-		     &sensor_dev_attr_pwm3_enable.dev_attr))
-		 || (err = device_create_file(dev,
-		     &sensor_dev_attr_pwm1.dev_attr))
-		 || (err = device_create_file(dev,
-		     &sensor_dev_attr_pwm2.dev_attr))
-		 || (err = device_create_file(dev,
-		     &sensor_dev_attr_pwm3.dev_attr))
-		 || (err = device_create_file(dev,
-		     &dev_attr_pwm1_freq))
-		 || (err = device_create_file(dev,
-		     &dev_attr_pwm2_freq))
-		 || (err = device_create_file(dev,
-		     &dev_attr_pwm3_freq)))
-			goto ERROR4;
+		if (!(sio_data->skip_pwm & (1 << 0))) {
+			if ((err = device_create_file(dev,
+			     &sensor_dev_attr_pwm1_enable.dev_attr))
+			 || (err = device_create_file(dev,
+			     &sensor_dev_attr_pwm1.dev_attr))
+			 || (err = device_create_file(dev,
+			     &dev_attr_pwm1_freq)))
+				goto ERROR4;
+		}
+		if (!(sio_data->skip_pwm & (1 << 1))) {
+			if ((err = device_create_file(dev,
+			     &sensor_dev_attr_pwm2_enable.dev_attr))
+			 || (err = device_create_file(dev,
+			     &sensor_dev_attr_pwm2.dev_attr))
+			 || (err = device_create_file(dev,
+			     &dev_attr_pwm2_freq)))
+				goto ERROR4;
+		}
+		if (!(sio_data->skip_pwm & (1 << 2))) {
+			if ((err = device_create_file(dev,
+			     &sensor_dev_attr_pwm3_enable.dev_attr))
+			 || (err = device_create_file(dev,
+			     &sensor_dev_attr_pwm3.dev_attr))
+			 || (err = device_create_file(dev,
+			     &dev_attr_pwm3_freq)))
+				goto ERROR4;
+		}
 	}
 
 	if (data->type == it8712 || data->type == it8716
@@ -1546,6 +1577,7 @@
 	unsigned short isa_address=0;
 	struct it87_sio_data sio_data;
 
+	memset(&sio_data, 0, sizeof(struct it87_sio_data));
 	err = it87_find(&isa_address, &sio_data);
 	if (err)
 		return err;

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 8e93a79..052879a 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig

@@ -780,10 +780,6 @@
 	  to transfer data to and from memory.  Saying Y is safe and improves
 	  performance.
 
-config BLK_DEV_IDE_SWARM
-	tristate "IDE for Sibyte evaluation boards"
-	depends on SIBYTE_SB1xxx_SOC
-
 config BLK_DEV_IDE_AU1XXX
        bool "IDE for AMD Alchemy Au1200"
        depends on SOC_AU1200

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 49a8c58..03c2cb6 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c

@@ -1113,7 +1113,7 @@
 
 	if (write) {
 		/* disk has become write protected */
-		if (cd->disk->policy) {
+		if (get_disk_ro(cd->disk)) {
 			cdrom_end_request(drive, 0);
 			return ide_stopped;
 		}
@@ -1661,7 +1661,9 @@
 		cdi->mask &= ~CDC_PLAY_AUDIO;
 
 	mechtype = buf[8 + 6] >> 5;
-	if (mechtype == mechtype_caddy || mechtype == mechtype_popup)
+	if (mechtype == mechtype_caddy ||
+	    mechtype == mechtype_popup ||
+	    (drive->atapi_flags & IDE_AFLAG_NO_AUTOCLOSE))
 		cdi->mask |= CDC_CLOSE_TRAY;
 
 	if (cdi->sanyo_slot > 0) {
@@ -1859,6 +1861,8 @@
 	{ "MATSHITADVD-ROM SR-8176", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
 	{ "MATSHITADVD-ROM SR-8174", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
 	{ "Optiarc DVD RW AD-5200A", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
+	{ "Optiarc DVD RW AD-7200A", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
+	{ "Optiarc DVD RW AD-7543A", NULL,   IDE_AFLAG_NO_AUTOCLOSE	     },
 	{ NULL, NULL, 0 }
 };
 

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 07ef88b..33ea8c0 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c

@@ -41,6 +41,12 @@
 #include <asm/io.h>
 #include <asm/div64.h>
 
+#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
+#define IDE_DISK_MINORS		(1 << PARTN_BITS)
+#else
+#define IDE_DISK_MINORS		0
+#endif
+
 struct ide_disk_obj {
 	ide_drive_t	*drive;
 	ide_driver_t	*driver;
@@ -1151,8 +1157,7 @@
 	if (!idkp)
 		goto failed;
 
-	g = alloc_disk_node(1 << PARTN_BITS,
-			hwif_to_node(drive->hwif));
+	g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
 	if (!g)
 		goto out_free_idkp;
 
@@ -1178,9 +1183,11 @@
 	} else
 		drive->attach = 1;
 
-	g->minors = 1 << PARTN_BITS;
+	g->minors = IDE_DISK_MINORS;
 	g->driverfs_dev = &drive->gendev;
-	g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0;
+	g->flags |= GENHD_FL_EXT_DEVT;
+	if (drive->removable)
+		g->flags |= GENHD_FL_REMOVABLE;
 	set_capacity(g, idedisk_capacity(drive));
 	g->fops = &idedisk_ops;
 	add_disk(g);

diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index adc6827..3fa07c0 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c

@@ -211,7 +211,7 @@
 				xcount = bcount & 0xffff;
 				if (is_trm290)
 					xcount = ((xcount >> 2) - 1) << 16;
-				if (xcount == 0x0000) {
+				else if (xcount == 0x0000) {
 	/* 
 	 * Most chipsets correctly interpret a length of 0x0000 as 64KB,
 	 * but at least one (e.g. CS5530) misinterprets it as zero (!).

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 994e410..70aa86c 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c

@@ -1188,7 +1188,7 @@
 {
 	struct gendisk *p = data;
 	*part &= (1 << PARTN_BITS) - 1;
-	return &p->dev.kobj;
+	return &disk_to_dev(p)->kobj;
 }
 
 static int exact_lock(dev_t dev, void *data)
@@ -1492,7 +1492,7 @@
 
 static int ide_sysfs_register_port(ide_hwif_t *hwif)
 {
-	int i, rc;
+	int i, uninitialized_var(rc);
 
 	for (i = 0; ide_port_attrs[i]; i++) {
 		rc = device_create_file(hwif->portdev, ide_port_attrs[i]);

diff --git a/drivers/ide/mips/Makefile b/drivers/ide/mips/Makefile
index 677c7b2..5873fa0 100644
--- a/drivers/ide/mips/Makefile
+++ b/drivers/ide/mips/Makefile

@@ -1,4 +1,3 @@
-obj-$(CONFIG_BLK_DEV_IDE_SWARM)		+= swarm.o
 obj-$(CONFIG_BLK_DEV_IDE_AU1XXX)	+= au1xxx-ide.o
 
 EXTRA_CFLAGS    := -Idrivers/ide

diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c
deleted file mode 100644
index 39c9ee9..0000000
--- a/drivers/ide/mips/swarm.c
+++ /dev/null

@@ -1,197 +0,0 @@
-/*
- * Copyright (C) 2001, 2002, 2003 Broadcom Corporation
- * Copyright (C) 2004 MontaVista Software Inc.
- *	Author:	Manish Lachwani, mlachwani@mvista.com
- * Copyright (C) 2004  MIPS Technologies, Inc.  All rights reserved.
- *	Author: Maciej W. Rozycki <macro@mips.com>
- * Copyright (c) 2006, 2008  Maciej W. Rozycki
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- */
-
-/*
- *  Derived loosely from ide-pmac.c, so:
- *  Copyright (C) 1998 Paul Mackerras.
- *  Copyright (C) 1995-1998 Mark Lord
- */
-
-/*
- * Boards with SiByte processors so far have supported IDE devices via
- * the Generic Bus, PCI bus, and built-in PCMCIA interface.  In all
- * cases, byte-swapping must be avoided for these devices (whereas
- * other PCI devices, for example, will require swapping).  Any
- * SiByte-targetted kernel including IDE support will include this
- * file.  Probing of a Generic Bus for an IDE device is controlled by
- * the definition of "SIBYTE_HAVE_IDE", which is provided by
- * <asm/sibyte/board.h> for Broadcom boards.
- */
-
-#include <linux/ide.h>
-#include <linux/ioport.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/platform_device.h>
-
-#include <asm/io.h>
-
-#include <asm/sibyte/board.h>
-#include <asm/sibyte/sb1250_genbus.h>
-#include <asm/sibyte/sb1250_regs.h>
-
-#define DRV_NAME "ide-swarm"
-
-static char swarm_ide_string[] = DRV_NAME;
-
-static struct resource swarm_ide_resource = {
-	.name	= "SWARM GenBus IDE",
-	.flags	= IORESOURCE_MEM,
-};
-
-static struct platform_device *swarm_ide_dev;
-
-static const struct ide_port_info swarm_port_info = {
-	.name			= DRV_NAME,
-	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
-};
-
-/*
- * swarm_ide_probe - if the board header indicates the existence of
- * Generic Bus IDE, allocate a HWIF for it.
- */
-static int __devinit swarm_ide_probe(struct device *dev)
-{
-	u8 __iomem *base;
-	struct ide_host *host;
-	phys_t offset, size;
-	int i, rc;
-	hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL };
-
-	if (!SIBYTE_HAVE_IDE)
-		return -ENODEV;
-
-	base = ioremap(A_IO_EXT_BASE, 0x800);
-	offset = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_START_ADDR, IDE_CS));
-	size = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_MULT_SIZE, IDE_CS));
-	iounmap(base);
-
-	offset = G_IO_START_ADDR(offset) << S_IO_ADDRBASE;
-	size = (G_IO_MULT_SIZE(size) + 1) << S_IO_REGSIZE;
-	if (offset < A_PHYS_GENBUS || offset >= A_PHYS_GENBUS_END) {
-		printk(KERN_INFO DRV_NAME
-		       ": IDE interface at GenBus disabled\n");
-		return -EBUSY;
-	}
-
-	printk(KERN_INFO DRV_NAME ": IDE interface at GenBus slot %i\n",
-	       IDE_CS);
-
-	swarm_ide_resource.start = offset;
-	swarm_ide_resource.end = offset + size - 1;
-	if (request_resource(&iomem_resource, &swarm_ide_resource)) {
-		printk(KERN_ERR DRV_NAME
-		       ": can't request I/O memory resource\n");
-		return -EBUSY;
-	}
-
-	base = ioremap(offset, size);
-
-	memset(&hw, 0, sizeof(hw));
-	for (i = 0; i <= 7; i++)
-		hw.io_ports_array[i] =
-				(unsigned long)(base + ((0x1f0 + i) << 5));
-	hw.io_ports.ctl_addr =
-				(unsigned long)(base + (0x3f6 << 5));
-	hw.irq = K_INT_GB_IDE;
-	hw.chipset = ide_generic;
-
-	rc = ide_host_add(&swarm_port_info, hws, &host);
-	if (rc)
-		goto err;
-
-	dev_set_drvdata(dev, host);
-
-	return 0;
-err:
-	release_resource(&swarm_ide_resource);
-	iounmap(base);
-	return rc;
-}
-
-static struct device_driver swarm_ide_driver = {
-	.name	= swarm_ide_string,
-	.bus	= &platform_bus_type,
-	.probe	= swarm_ide_probe,
-};
-
-static void swarm_ide_platform_release(struct device *device)
-{
-	struct platform_device *pldev;
-
-	/* free device */
-	pldev = to_platform_device(device);
-	kfree(pldev);
-}
-
-static int __devinit swarm_ide_init_module(void)
-{
-	struct platform_device *pldev;
-	int err;
-
-	printk(KERN_INFO "SWARM IDE driver\n");
-
-	if (driver_register(&swarm_ide_driver)) {
-		printk(KERN_ERR "Driver registration failed\n");
-		err = -ENODEV;
-		goto out;
-	}
-
-        if (!(pldev = kzalloc(sizeof (*pldev), GFP_KERNEL))) {
-		err = -ENOMEM;
-		goto out_unregister_driver;
-	}
-
-	pldev->name		= swarm_ide_string;
-	pldev->id		= 0;
-	pldev->dev.release	= swarm_ide_platform_release;
-
-	if (platform_device_register(pldev)) {
-		err = -ENODEV;
-		goto out_free_pldev;
-	}
-
-        if (!pldev->dev.driver) {
-		/*
-		 * The driver was not bound to this device, there was
-                 * no hardware at this address. Unregister it, as the
-		 * release fuction will take care of freeing the
-		 * allocated structure
-		 */
-		platform_device_unregister (pldev);
-	}
-
-	swarm_ide_dev = pldev;
-
-	return 0;
-
-out_free_pldev:
-	kfree(pldev);
-
-out_unregister_driver:
-	driver_unregister(&swarm_ide_driver);
-out:
-	return err;
-}
-
-module_init(swarm_ide_init_module);

diff --git a/drivers/leds/leds-fsg.c b/drivers/leds/leds-fsg.c
index be0e121..3493515 100644
--- a/drivers/leds/leds-fsg.c
+++ b/drivers/leds/leds-fsg.c

@@ -161,6 +161,16 @@
 {
 	int ret;
 
+	/* Map the LED chip select address space */
+	latch_address = (unsigned short *) ioremap(IXP4XX_EXP_BUS_BASE(2), 512);
+	if (!latch_address) {
+		ret = -ENOMEM;
+		goto failremap;
+	}
+
+	latch_value = 0xffff;
+	*latch_address = latch_value;
+
 	ret = led_classdev_register(&pdev->dev, &fsg_wlan_led);
 	if (ret < 0)
 		goto failwlan;
@@ -185,20 +195,8 @@
 	if (ret < 0)
 		goto failring;
 
-	/* Map the LED chip select address space */
-	latch_address = (unsigned short *) ioremap(IXP4XX_EXP_BUS_BASE(2), 512);
-	if (!latch_address) {
-		ret = -ENOMEM;
-		goto failremap;
-	}
-
-	latch_value = 0xffff;
-	*latch_address = latch_value;
-
 	return ret;
 
- failremap:
-	led_classdev_unregister(&fsg_ring_led);
  failring:
 	led_classdev_unregister(&fsg_sync_led);
  failsync:
@@ -210,14 +208,14 @@
  failwan:
 	led_classdev_unregister(&fsg_wlan_led);
  failwlan:
+	iounmap(latch_address);
+ failremap:
 
 	return ret;
 }
 
 static int fsg_led_remove(struct platform_device *pdev)
 {
-	iounmap(latch_address);
-
 	led_classdev_unregister(&fsg_wlan_led);
 	led_classdev_unregister(&fsg_wan_led);
 	led_classdev_unregister(&fsg_sata_led);
@@ -225,6 +223,8 @@
 	led_classdev_unregister(&fsg_sync_led);
 	led_classdev_unregister(&fsg_ring_led);
 
+	iounmap(latch_address);
+
 	return 0;
 }
 

diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
index 146c069..f508729 100644
--- a/drivers/leds/leds-pca955x.c
+++ b/drivers/leds/leds-pca955x.c

@@ -248,11 +248,10 @@
 					const struct i2c_device_id *id)
 {
 	struct pca955x_led *pca955x;
-	int i;
-	int err = -ENODEV;
 	struct pca955x_chipdef *chip;
 	struct i2c_adapter *adapter;
 	struct led_platform_data *pdata;
+	int i, err;
 
 	chip = &pca955x_chipdefs[id->driver_data];
 	adapter = to_i2c_adapter(client->dev.parent);
@@ -282,43 +281,41 @@
 		}
 	}
 
-	for (i = 0; i < chip->bits; i++) {
-		pca955x = kzalloc(sizeof(struct pca955x_led), GFP_KERNEL);
-		if (!pca955x) {
-			err = -ENOMEM;
-			goto exit;
-		}
+	pca955x = kzalloc(sizeof(*pca955x) * chip->bits, GFP_KERNEL);
+	if (!pca955x)
+		return -ENOMEM;
 
-		pca955x->chipdef = chip;
-		pca955x->client = client;
-		pca955x->led_num = i;
+	i2c_set_clientdata(client, pca955x);
+
+	for (i = 0; i < chip->bits; i++) {
+		pca955x[i].chipdef = chip;
+		pca955x[i].client = client;
+		pca955x[i].led_num = i;
+
 		/* Platform data can specify LED names and default triggers */
 		if (pdata) {
 			if (pdata->leds[i].name)
-				snprintf(pca955x->name, 32, "pca955x:%s",
-							pdata->leds[i].name);
+				snprintf(pca955x[i].name,
+					 sizeof(pca955x[i].name), "pca955x:%s",
+					 pdata->leds[i].name);
 			if (pdata->leds[i].default_trigger)
-				pca955x->led_cdev.default_trigger =
+				pca955x[i].led_cdev.default_trigger =
 					pdata->leds[i].default_trigger;
 		} else {
-			snprintf(pca955x->name, 32, "pca955x:%d", i);
+			snprintf(pca955x[i].name, sizeof(pca955x[i].name),
+				 "pca955x:%d", i);
 		}
-		spin_lock_init(&pca955x->lock);
 
-		pca955x->led_cdev.name = pca955x->name;
-		pca955x->led_cdev.brightness_set =
-				pca955x_led_set;
+		spin_lock_init(&pca955x[i].lock);
 
-		/*
-		 * Client data is a pointer to the _first_ pca955x_led
-		 * struct
-		 */
-		if (i == 0)
-			i2c_set_clientdata(client, pca955x);
+		pca955x[i].led_cdev.name = pca955x[i].name;
+		pca955x[i].led_cdev.brightness_set = pca955x_led_set;
 
-		INIT_WORK(&(pca955x->work), pca955x_led_work);
+		INIT_WORK(&pca955x[i].work, pca955x_led_work);
 
-		led_classdev_register(&client->dev, &(pca955x->led_cdev));
+		err = led_classdev_register(&client->dev, &pca955x[i].led_cdev);
+		if (err < 0)
+			goto exit;
 	}
 
 	/* Turn off LEDs */
@@ -336,23 +333,32 @@
 	pca955x_write_psc(client, 1, 0);
 
 	return 0;
+
 exit:
+	while (i--) {
+		led_classdev_unregister(&pca955x[i].led_cdev);
+		cancel_work_sync(&pca955x[i].work);
+	}
+
+	kfree(pca955x);
+	i2c_set_clientdata(client, NULL);
+
 	return err;
 }
 
 static int __devexit pca955x_remove(struct i2c_client *client)
 {
 	struct pca955x_led *pca955x = i2c_get_clientdata(client);
-	int leds = pca955x->chipdef->bits;
 	int i;
 
-	for (i = 0; i < leds; i++) {
-		led_classdev_unregister(&(pca955x->led_cdev));
-		cancel_work_sync(&(pca955x->work));
-		kfree(pca955x);
-		pca955x = pca955x + 1;
+	for (i = 0; i < pca955x->chipdef->bits; i++) {
+		led_classdev_unregister(&pca955x[i].led_cdev);
+		cancel_work_sync(&pca955x[i].work);
 	}
 
+	kfree(pca955x);
+	i2c_set_clientdata(client, NULL);
+
 	return 0;
 }
 

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index b262c00..5b91915 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c

@@ -426,7 +426,7 @@
 				old_nl->next = (uint32_t) ((void *) nl -
 							   (void *) old_nl);
 			disk = dm_disk(hc->md);
-			nl->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor));
+			nl->dev = huge_encode_dev(disk_devt(disk));
 			nl->next = 0;
 			strcpy(nl->name, hc->name);
 
@@ -539,7 +539,7 @@
 	if (dm_suspended(md))
 		param->flags |= DM_SUSPEND_FLAG;
 
-	param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor));
+	param->dev = huge_encode_dev(disk_devt(disk));
 
 	/*
 	 * Yes, this will be out of date by the time it gets back
@@ -548,7 +548,7 @@
 	 */
 	param->open_count = dm_open_count(md);
 
-	if (disk->policy)
+	if (get_disk_ro(disk))
 		param->flags |= DM_READONLY_FLAG;
 
 	param->event_nr = dm_get_event_nr(md);

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index c2fcf28..3d38481 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c

@@ -33,6 +33,7 @@
 	unsigned fail_count;		/* Cumulative failure count */
 
 	struct dm_path path;
+	struct work_struct deactivate_path;
 };
 
 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
@@ -112,6 +113,7 @@
 static void process_queued_ios(struct work_struct *work);
 static void trigger_event(struct work_struct *work);
 static void activate_path(struct work_struct *work);
+static void deactivate_path(struct work_struct *work);
 
 
 /*-----------------------------------------------
@@ -122,8 +124,10 @@
 {
 	struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
 
-	if (pgpath)
+	if (pgpath) {
 		pgpath->path.is_active = 1;
+		INIT_WORK(&pgpath->deactivate_path, deactivate_path);
+	}
 
 	return pgpath;
 }
@@ -133,6 +137,14 @@
 	kfree(pgpath);
 }
 
+static void deactivate_path(struct work_struct *work)
+{
+	struct pgpath *pgpath =
+		container_of(work, struct pgpath, deactivate_path);
+
+	blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue);
+}
+
 static struct priority_group *alloc_priority_group(void)
 {
 	struct priority_group *pg;
@@ -870,6 +882,7 @@
 		      pgpath->path.dev->name, m->nr_valid_paths);
 
 	queue_work(kmultipathd, &m->trigger_event);
+	queue_work(kmultipathd, &pgpath->deactivate_path);
 
 out:
 	spin_unlock_irqrestore(&m->lock, flags);

diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 4de90ab..b745d8a 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c

@@ -284,8 +284,8 @@
 
 	memset(major_minor, 0, sizeof(major_minor));
 	sprintf(major_minor, "%d:%d",
-		bio->bi_bdev->bd_disk->major,
-		bio->bi_bdev->bd_disk->first_minor);
+		MAJOR(disk_devt(bio->bi_bdev->bd_disk)),
+		MINOR(disk_devt(bio->bi_bdev->bd_disk)));
 
 	/*
 	 * Test to see which stripe drive triggered the event

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ace998c..327de03 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c

@@ -377,13 +377,14 @@
 static void start_io_acct(struct dm_io *io)
 {
 	struct mapped_device *md = io->md;
+	int cpu;
 
 	io->start_time = jiffies;
 
-	preempt_disable();
-	disk_round_stats(dm_disk(md));
-	preempt_enable();
-	dm_disk(md)->in_flight = atomic_inc_return(&md->pending);
+	cpu = part_stat_lock();
+	part_round_stats(cpu, &dm_disk(md)->part0);
+	part_stat_unlock();
+	dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
 }
 
 static int end_io_acct(struct dm_io *io)
@@ -391,15 +392,16 @@
 	struct mapped_device *md = io->md;
 	struct bio *bio = io->bio;
 	unsigned long duration = jiffies - io->start_time;
-	int pending;
+	int pending, cpu;
 	int rw = bio_data_dir(bio);
 
-	preempt_disable();
-	disk_round_stats(dm_disk(md));
-	preempt_enable();
-	dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending);
+	cpu = part_stat_lock();
+	part_round_stats(cpu, &dm_disk(md)->part0);
+	part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
+	part_stat_unlock();
 
-	disk_stat_add(dm_disk(md), ticks[rw], duration);
+	dm_disk(md)->part0.in_flight = pending =
+		atomic_dec_return(&md->pending);
 
 	return !pending;
 }
@@ -885,6 +887,7 @@
 	int r = -EIO;
 	int rw = bio_data_dir(bio);
 	struct mapped_device *md = q->queuedata;
+	int cpu;
 
 	/*
 	 * There is no use in forwarding any barrier request since we can't
@@ -897,8 +900,10 @@
 
 	down_read(&md->io_lock);
 
-	disk_stat_inc(dm_disk(md), ios[rw]);
-	disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
+	part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
+	part_stat_unlock();
 
 	/*
 	 * If we're suspended we have to queue
@@ -1146,7 +1151,7 @@
 
 static void free_dev(struct mapped_device *md)
 {
-	int minor = md->disk->first_minor;
+	int minor = MINOR(disk_devt(md->disk));
 
 	if (md->suspended_bdev) {
 		unlock_fs(md);
@@ -1182,7 +1187,7 @@
 	list_splice_init(&md->uevent_list, &uevents);
 	spin_unlock_irqrestore(&md->uevent_lock, flags);
 
-	dm_send_uevents(&uevents, &md->disk->dev.kobj);
+	dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
 
 	atomic_inc(&md->event_nr);
 	wake_up(&md->eventq);
@@ -1267,7 +1272,7 @@
 
 	md = idr_find(&_minor_idr, minor);
 	if (md && (md == MINOR_ALLOCED ||
-		   (dm_disk(md)->first_minor != minor) ||
+		   (MINOR(disk_devt(dm_disk(md))) != minor) ||
 		   test_bit(DMF_FREEING, &md->flags))) {
 		md = NULL;
 		goto out;
@@ -1318,7 +1323,8 @@
 
 	if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
 		map = dm_get_table(md);
-		idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor);
+		idr_replace(&_minor_idr, MINOR_ALLOCED,
+			    MINOR(disk_devt(dm_disk(md))));
 		set_bit(DMF_FREEING, &md->flags);
 		spin_unlock(&_minor_lock);
 		if (!dm_suspended(md)) {
@@ -1638,7 +1644,7 @@
  *---------------------------------------------------------------*/
 void dm_kobject_uevent(struct mapped_device *md)
 {
-	kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE);
+	kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE);
 }
 
 uint32_t dm_next_uevent_seq(struct mapped_device *md)

diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index b1eebf8..b9cbee68 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c

@@ -318,14 +318,18 @@
 	mddev_t *mddev = q->queuedata;
 	dev_info_t *tmp_dev;
 	sector_t block;
+	int cpu;
 
 	if (unlikely(bio_barrier(bio))) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+		      bio_sectors(bio));
+	part_stat_unlock();
 
 	tmp_dev = which_dev(mddev, bio->bi_sector);
 	block = bio->bi_sector >> 1;
@@ -349,7 +353,7 @@
 		 * split it.
 		 */
 		struct bio_pair *bp;
-		bp = bio_split(bio, bio_split_pool,
+		bp = bio_split(bio,
 			       ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector);
 		if (linear_make_request(q, &bp->bio1))
 			generic_make_request(&bp->bio1);

diff --git a/drivers/md/md.c b/drivers/md/md.c
index deeac4b..0a3a4bd 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c

@@ -1464,10 +1464,7 @@
 	if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
 		goto fail;
 
-	if (rdev->bdev->bd_part)
-		ko = &rdev->bdev->bd_part->dev.kobj;
-	else
-		ko = &rdev->bdev->bd_disk->dev.kobj;
+	ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
 	if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) {
 		kobject_del(&rdev->kobj);
 		goto fail;
@@ -3470,8 +3467,8 @@
 	disk->queue = mddev->queue;
 	add_disk(disk);
 	mddev->gendisk = disk;
-	error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj,
-				     "%s", "md");
+	error = kobject_init_and_add(&mddev->kobj, &md_ktype,
+				     &disk_to_dev(disk)->kobj, "%s", "md");
 	mutex_unlock(&disks_mutex);
 	if (error)
 		printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
@@ -3761,7 +3758,7 @@
 	sysfs_notify(&mddev->kobj, NULL, "array_state");
 	sysfs_notify(&mddev->kobj, NULL, "sync_action");
 	sysfs_notify(&mddev->kobj, NULL, "degraded");
-	kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE);
+	kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
 	return 0;
 }
 
@@ -5549,8 +5546,8 @@
 	rcu_read_lock();
 	rdev_for_each_rcu(rdev, mddev) {
 		struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
-		curr_events = disk_stat_read(disk, sectors[0]) + 
-				disk_stat_read(disk, sectors[1]) - 
+		curr_events = part_stat_read(&disk->part0, sectors[0]) +
+				part_stat_read(&disk->part0, sectors[1]) -
 				atomic_read(&disk->sync_io);
 		/* sync IO will cause sync_io to increase before the disk_stats
 		 * as sync_io is counted when a request starts, and

diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index c4779cc..8bb8794 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c

@@ -147,6 +147,7 @@
 	struct multipath_bh * mp_bh;
 	struct multipath_info *multipath;
 	const int rw = bio_data_dir(bio);
+	int cpu;
 
 	if (unlikely(bio_barrier(bio))) {
 		bio_endio(bio, -EOPNOTSUPP);
@@ -158,8 +159,11 @@
 	mp_bh->master_bio = bio;
 	mp_bh->mddev = mddev;
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+		      bio_sectors(bio));
+	part_stat_unlock();
 
 	mp_bh->path = multipath_map(conf);
 	if (mp_bh->path < 0) {

diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 1836106..53508a8 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c

@@ -399,14 +399,18 @@
 	sector_t chunk;
 	sector_t block, rsect;
 	const int rw = bio_data_dir(bio);
+	int cpu;
 
 	if (unlikely(bio_barrier(bio))) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+		      bio_sectors(bio));
+	part_stat_unlock();
 
 	chunk_size = mddev->chunk_size >> 10;
 	chunk_sects = mddev->chunk_size >> 9;
@@ -423,7 +427,7 @@
 		/* This is a one page bio that upper layers
 		 * refuse to split for us, so we need to split it.
 		 */
-		bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
+		bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1)));
 		if (raid0_make_request(q, &bp->bio1))
 			generic_make_request(&bp->bio1);
 		if (raid0_make_request(q, &bp->bio2))

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 03a5ab7..b976442 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c

@@ -779,7 +779,7 @@
 	struct page **behind_pages = NULL;
 	const int rw = bio_data_dir(bio);
 	const int do_sync = bio_sync(bio);
-	int do_barriers;
+	int cpu, do_barriers;
 	mdk_rdev_t *blocked_rdev;
 
 	/*
@@ -804,8 +804,11 @@
 
 	bitmap = mddev->bitmap;
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+		      bio_sectors(bio));
+	part_stat_unlock();
 
 	/*
 	 * make_request() can abort the operation when READA is being
@@ -1302,9 +1305,6 @@
 					sbio->bi_size = r1_bio->sectors << 9;
 					sbio->bi_idx = 0;
 					sbio->bi_phys_segments = 0;
-					sbio->bi_hw_segments = 0;
-					sbio->bi_hw_front_size = 0;
-					sbio->bi_hw_back_size = 0;
 					sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
 					sbio->bi_flags |= 1 << BIO_UPTODATE;
 					sbio->bi_next = NULL;
@@ -1790,7 +1790,6 @@
 		bio->bi_vcnt = 0;
 		bio->bi_idx = 0;
 		bio->bi_phys_segments = 0;
-		bio->bi_hw_segments = 0;
 		bio->bi_size = 0;
 		bio->bi_end_io = NULL;
 		bio->bi_private = NULL;

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index e34cd0e..8bdc9bf 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c

@@ -789,6 +789,7 @@
 	mirror_info_t *mirror;
 	r10bio_t *r10_bio;
 	struct bio *read_bio;
+	int cpu;
 	int i;
 	int chunk_sects = conf->chunk_mask + 1;
 	const int rw = bio_data_dir(bio);
@@ -816,7 +817,7 @@
 		/* This is a one page bio that upper layers
 		 * refuse to split for us, so we need to split it.
 		 */
-		bp = bio_split(bio, bio_split_pool,
+		bp = bio_split(bio,
 			       chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
 		if (make_request(q, &bp->bio1))
 			generic_make_request(&bp->bio1);
@@ -843,8 +844,11 @@
 	 */
 	wait_barrier(conf);
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+		      bio_sectors(bio));
+	part_stat_unlock();
 
 	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
 
@@ -1345,9 +1349,6 @@
 		tbio->bi_size = r10_bio->sectors << 9;
 		tbio->bi_idx = 0;
 		tbio->bi_phys_segments = 0;
-		tbio->bi_hw_segments = 0;
-		tbio->bi_hw_front_size = 0;
-		tbio->bi_hw_back_size = 0;
 		tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
 		tbio->bi_flags |= 1 << BIO_UPTODATE;
 		tbio->bi_next = NULL;
@@ -1947,7 +1948,6 @@
 		bio->bi_vcnt = 0;
 		bio->bi_idx = 0;
 		bio->bi_phys_segments = 0;
-		bio->bi_hw_segments = 0;
 		bio->bi_size = 0;
 	}
 

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 224de02..ae16794 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c

@@ -101,6 +101,40 @@
 const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
 #endif
 
+/*
+ * We maintain a biased count of active stripes in the bottom 16 bits of
+ * bi_phys_segments, and a count of processed stripes in the upper 16 bits
+ */
+static inline int raid5_bi_phys_segments(struct bio *bio)
+{
+	return bio->bi_phys_segments & 0xffff;
+}
+
+static inline int raid5_bi_hw_segments(struct bio *bio)
+{
+	return (bio->bi_phys_segments >> 16) & 0xffff;
+}
+
+static inline int raid5_dec_bi_phys_segments(struct bio *bio)
+{
+	--bio->bi_phys_segments;
+	return raid5_bi_phys_segments(bio);
+}
+
+static inline int raid5_dec_bi_hw_segments(struct bio *bio)
+{
+	unsigned short val = raid5_bi_hw_segments(bio);
+
+	--val;
+	bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio);
+	return val;
+}
+
+static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
+{
+	bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16);
+}
+
 static inline int raid6_next_disk(int disk, int raid_disks)
 {
 	disk++;
@@ -507,7 +541,7 @@
 			while (rbi && rbi->bi_sector <
 				dev->sector + STRIPE_SECTORS) {
 				rbi2 = r5_next_bio(rbi, dev->sector);
-				if (--rbi->bi_phys_segments == 0) {
+				if (!raid5_dec_bi_phys_segments(rbi)) {
 					rbi->bi_next = return_bi;
 					return_bi = rbi;
 				}
@@ -1725,7 +1759,7 @@
 	if (*bip)
 		bi->bi_next = *bip;
 	*bip = bi;
-	bi->bi_phys_segments ++;
+	bi->bi_phys_segments++;
 	spin_unlock_irq(&conf->device_lock);
 	spin_unlock(&sh->lock);
 
@@ -1819,7 +1853,7 @@
 			sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
 			clear_bit(BIO_UPTODATE, &bi->bi_flags);
-			if (--bi->bi_phys_segments == 0) {
+			if (!raid5_dec_bi_phys_segments(bi)) {
 				md_write_end(conf->mddev);
 				bi->bi_next = *return_bi;
 				*return_bi = bi;
@@ -1834,7 +1868,7 @@
 		       sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
 			clear_bit(BIO_UPTODATE, &bi->bi_flags);
-			if (--bi->bi_phys_segments == 0) {
+			if (!raid5_dec_bi_phys_segments(bi)) {
 				md_write_end(conf->mddev);
 				bi->bi_next = *return_bi;
 				*return_bi = bi;
@@ -1858,7 +1892,7 @@
 				struct bio *nextbi =
 					r5_next_bio(bi, sh->dev[i].sector);
 				clear_bit(BIO_UPTODATE, &bi->bi_flags);
-				if (--bi->bi_phys_segments == 0) {
+				if (!raid5_dec_bi_phys_segments(bi)) {
 					bi->bi_next = *return_bi;
 					*return_bi = bi;
 				}
@@ -2033,7 +2067,7 @@
 				while (wbi && wbi->bi_sector <
 					dev->sector + STRIPE_SECTORS) {
 					wbi2 = r5_next_bio(wbi, dev->sector);
-					if (--wbi->bi_phys_segments == 0) {
+					if (!raid5_dec_bi_phys_segments(wbi)) {
 						md_write_end(conf->mddev);
 						wbi->bi_next = *return_bi;
 						*return_bi = wbi;
@@ -2814,7 +2848,7 @@
 				copy_data(0, rbi, dev->page, dev->sector);
 				rbi2 = r5_next_bio(rbi, dev->sector);
 				spin_lock_irq(&conf->device_lock);
-				if (--rbi->bi_phys_segments == 0) {
+				if (!raid5_dec_bi_phys_segments(rbi)) {
 					rbi->bi_next = return_bi;
 					return_bi = rbi;
 				}
@@ -3155,8 +3189,11 @@
 	if(bi) {
 		conf->retry_read_aligned_list = bi->bi_next;
 		bi->bi_next = NULL;
+		/*
+		 * this sets the active strip count to 1 and the processed
+		 * strip count to zero (upper 8 bits)
+		 */
 		bi->bi_phys_segments = 1; /* biased count of active stripes */
-		bi->bi_hw_segments = 0; /* count of processed stripes */
 	}
 
 	return bi;
@@ -3206,8 +3243,7 @@
 	if ((bi->bi_size>>9) > q->max_sectors)
 		return 0;
 	blk_recount_segments(q, bi);
-	if (bi->bi_phys_segments > q->max_phys_segments ||
-	    bi->bi_hw_segments > q->max_hw_segments)
+	if (bi->bi_phys_segments > q->max_phys_segments)
 		return 0;
 
 	if (q->merge_bvec_fn)
@@ -3351,7 +3387,7 @@
 	sector_t logical_sector, last_sector;
 	struct stripe_head *sh;
 	const int rw = bio_data_dir(bi);
-	int remaining;
+	int cpu, remaining;
 
 	if (unlikely(bio_barrier(bi))) {
 		bio_endio(bi, -EOPNOTSUPP);
@@ -3360,8 +3396,11 @@
 
 	md_write_start(mddev, bi);
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi));
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+		      bio_sectors(bi));
+	part_stat_unlock();
 
 	if (rw == READ &&
 	     mddev->reshape_position == MaxSector &&
@@ -3468,7 +3507,7 @@
 			
 	}
 	spin_lock_irq(&conf->device_lock);
-	remaining = --bi->bi_phys_segments;
+	remaining = raid5_dec_bi_phys_segments(bi);
 	spin_unlock_irq(&conf->device_lock);
 	if (remaining == 0) {
 
@@ -3752,7 +3791,7 @@
 		     sector += STRIPE_SECTORS,
 		     scnt++) {
 
-		if (scnt < raid_bio->bi_hw_segments)
+		if (scnt < raid5_bi_hw_segments(raid_bio))
 			/* already done this stripe */
 			continue;
 
@@ -3760,7 +3799,7 @@
 
 		if (!sh) {
 			/* failed to get a stripe - must wait */
-			raid_bio->bi_hw_segments = scnt;
+			raid5_set_bi_hw_segments(raid_bio, scnt);
 			conf->retry_read_aligned = raid_bio;
 			return handled;
 		}
@@ -3768,7 +3807,7 @@
 		set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
 		if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
 			release_stripe(sh);
-			raid_bio->bi_hw_segments = scnt;
+			raid5_set_bi_hw_segments(raid_bio, scnt);
 			conf->retry_read_aligned = raid_bio;
 			return handled;
 		}
@@ -3778,7 +3817,7 @@
 		handled++;
 	}
 	spin_lock_irq(&conf->device_lock);
-	remaining = --raid_bio->bi_phys_segments;
+	remaining = raid5_dec_bi_phys_segments(raid_bio);
 	spin_unlock_irq(&conf->device_lock);
 	if (remaining == 0)
 		bio_endio(raid_bio, 0);

diff --git a/drivers/media/common/tuners/tuner-xc2028.h b/drivers/media/common/tuners/tuner-xc2028.h
index 216025c..2c5b628 100644
--- a/drivers/media/common/tuners/tuner-xc2028.h
+++ b/drivers/media/common/tuners/tuner-xc2028.h

@@ -10,6 +10,7 @@
 #include "dvb_frontend.h"
 
 #define XC2028_DEFAULT_FIRMWARE "xc3028-v27.fw"
+#define XC3028L_DEFAULT_FIRMWARE "xc3028L-v36.fw"
 
 /*      Dmoduler		IF (kHz) */
 #define	XC3028_FE_DEFAULT	0		/* Don't load SCODE */

diff --git a/drivers/media/dvb/b2c2/flexcop-fe-tuner.c b/drivers/media/dvb/b2c2/flexcop-fe-tuner.c
index 4eed783..a127a41 100644
--- a/drivers/media/dvb/b2c2/flexcop-fe-tuner.c
+++ b/drivers/media/dvb/b2c2/flexcop-fe-tuner.c

@@ -491,6 +491,7 @@
 	.demod_address = 0x53,
 	.invert = 1,
 	.repeated_start_workaround = 1,
+	.serial_mpeg = 1,
 };
 
 static struct itd1000_config skystar2_rev2_7_itd1000_config = {

diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c
index 069d847..0c733c6 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.c
+++ b/drivers/media/dvb/dvb-core/dmxdev.c

@@ -364,15 +364,16 @@
 				       enum dmx_success success)
 {
 	struct dmxdev_filter *dmxdevfilter = filter->priv;
+	unsigned long flags;
 	int ret;
 
 	if (dmxdevfilter->buffer.error) {
 		wake_up(&dmxdevfilter->buffer.queue);
 		return 0;
 	}
-	spin_lock(&dmxdevfilter->dev->lock);
+	spin_lock_irqsave(&dmxdevfilter->dev->lock, flags);
 	if (dmxdevfilter->state != DMXDEV_STATE_GO) {
-		spin_unlock(&dmxdevfilter->dev->lock);
+		spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
 		return 0;
 	}
 	del_timer(&dmxdevfilter->timer);
@@ -391,7 +392,7 @@
 	}
 	if (dmxdevfilter->params.sec.flags & DMX_ONESHOT)
 		dmxdevfilter->state = DMXDEV_STATE_DONE;
-	spin_unlock(&dmxdevfilter->dev->lock);
+	spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
 	wake_up(&dmxdevfilter->buffer.queue);
 	return 0;
 }
@@ -403,11 +404,12 @@
 {
 	struct dmxdev_filter *dmxdevfilter = feed->priv;
 	struct dvb_ringbuffer *buffer;
+	unsigned long flags;
 	int ret;
 
-	spin_lock(&dmxdevfilter->dev->lock);
+	spin_lock_irqsave(&dmxdevfilter->dev->lock, flags);
 	if (dmxdevfilter->params.pes.output == DMX_OUT_DECODER) {
-		spin_unlock(&dmxdevfilter->dev->lock);
+		spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
 		return 0;
 	}
 
@@ -417,7 +419,7 @@
 	else
 		buffer = &dmxdevfilter->dev->dvr_buffer;
 	if (buffer->error) {
-		spin_unlock(&dmxdevfilter->dev->lock);
+		spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
 		wake_up(&buffer->queue);
 		return 0;
 	}
@@ -428,7 +430,7 @@
 		dvb_ringbuffer_flush(buffer);
 		buffer->error = ret;
 	}
-	spin_unlock(&dmxdevfilter->dev->lock);
+	spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
 	wake_up(&buffer->queue);
 	return 0;
 }

diff --git a/drivers/media/dvb/dvb-core/dvb_demux.c b/drivers/media/dvb/dvb-core/dvb_demux.c
index e2eca0b..a2c1fd5 100644
--- a/drivers/media/dvb/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb/dvb-core/dvb_demux.c

@@ -399,7 +399,9 @@
 void dvb_dmx_swfilter_packets(struct dvb_demux *demux, const u8 *buf,
 			      size_t count)
 {
-	spin_lock(&demux->lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&demux->lock, flags);
 
 	while (count--) {
 		if (buf[0] == 0x47)
@@ -407,16 +409,17 @@
 		buf += 188;
 	}
 
-	spin_unlock(&demux->lock);
+	spin_unlock_irqrestore(&demux->lock, flags);
 }
 
 EXPORT_SYMBOL(dvb_dmx_swfilter_packets);
 
 void dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf, size_t count)
 {
+	unsigned long flags;
 	int p = 0, i, j;
 
-	spin_lock(&demux->lock);
+	spin_lock_irqsave(&demux->lock, flags);
 
 	if (demux->tsbufp) {
 		i = demux->tsbufp;
@@ -449,17 +452,18 @@
 	}
 
 bailout:
-	spin_unlock(&demux->lock);
+	spin_unlock_irqrestore(&demux->lock, flags);
 }
 
 EXPORT_SYMBOL(dvb_dmx_swfilter);
 
 void dvb_dmx_swfilter_204(struct dvb_demux *demux, const u8 *buf, size_t count)
 {
+	unsigned long flags;
 	int p = 0, i, j;
 	u8 tmppack[188];
 
-	spin_lock(&demux->lock);
+	spin_lock_irqsave(&demux->lock, flags);
 
 	if (demux->tsbufp) {
 		i = demux->tsbufp;
@@ -500,7 +504,7 @@
 	}
 
 bailout:
-	spin_unlock(&demux->lock);
+	spin_unlock_irqrestore(&demux->lock, flags);
 }
 
 EXPORT_SYMBOL(dvb_dmx_swfilter_204);

diff --git a/drivers/media/dvb/frontends/s5h1420.c b/drivers/media/dvb/frontends/s5h1420.c
index 747d3fa..2e9fd28 100644
--- a/drivers/media/dvb/frontends/s5h1420.c
+++ b/drivers/media/dvb/frontends/s5h1420.c

@@ -59,7 +59,7 @@
 	 * it does not support repeated-start, workaround: write addr-1
 	 * and then read
 	 */
-	u8 shadow[255];
+	u8 shadow[256];
 };
 
 static u32 s5h1420_getsymbolrate(struct s5h1420_state* state);
@@ -94,8 +94,11 @@
 		if (ret != 3)
 			return ret;
 	} else {
-		ret = i2c_transfer(state->i2c, &msg[1], 2);
-		if (ret != 2)
+		ret = i2c_transfer(state->i2c, &msg[1], 1);
+		if (ret != 1)
+			return ret;
+		ret = i2c_transfer(state->i2c, &msg[2], 1);
+		if (ret != 1)
 			return ret;
 	}
 
@@ -823,7 +826,7 @@
 	struct s5h1420_state* state = fe->demodulator_priv;
 
 	/* disable power down and do reset */
-	state->CON_1_val = 0x10;
+	state->CON_1_val = state->config->serial_mpeg << 4;
 	s5h1420_writereg(state, 0x02, state->CON_1_val);
 	msleep(10);
 	s5h1420_reset(state);

diff --git a/drivers/media/dvb/frontends/s5h1420.h b/drivers/media/dvb/frontends/s5h1420.h
index 4c913f1..ff30813 100644
--- a/drivers/media/dvb/frontends/s5h1420.h
+++ b/drivers/media/dvb/frontends/s5h1420.h

@@ -32,10 +32,12 @@
 	u8 demod_address;
 
 	/* does the inversion require inversion? */
-	u8 invert : 1;
+	u8 invert:1;
 
-	u8 repeated_start_workaround : 1;
-	u8 cdclk_polarity : 1; /* 1 == falling edge, 0 == raising edge */
+	u8 repeated_start_workaround:1;
+	u8 cdclk_polarity:1; /* 1 == falling edge, 0 == raising edge */
+
+	u8 serial_mpeg:1;
 };
 
 #if defined(CONFIG_DVB_S5H1420) || (defined(CONFIG_DVB_S5H1420_MODULE) && defined(MODULE))

diff --git a/drivers/media/dvb/siano/sms-cards.c b/drivers/media/dvb/siano/sms-cards.c
index cc5efb6..9da260f 100644
--- a/drivers/media/dvb/siano/sms-cards.c
+++ b/drivers/media/dvb/siano/sms-cards.c

@@ -40,6 +40,8 @@
 		.driver_info = SMS1XXX_BOARD_HAUPPAUGE_OKEMO_B },
 	{ USB_DEVICE(0x2040, 0x5500),
 		.driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM },
+	{ USB_DEVICE(0x2040, 0x5510),
+		.driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM },
 	{ USB_DEVICE(0x2040, 0x5580),
 		.driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM },
 	{ USB_DEVICE(0x2040, 0x5590),
@@ -87,7 +89,7 @@
 		.fw[DEVICE_MODE_DVBT_BDA] = "sms1xxx-nova-b-dvbt-01.fw",
 	},
 	[SMS1XXX_BOARD_HAUPPAUGE_WINDHAM] = {
-		.name	= "Hauppauge WinTV-Nova-T-MiniStick",
+		.name	= "Hauppauge WinTV MiniStick",
 		.type	= SMS_NOVA_B0,
 		.fw[DEVICE_MODE_DVBT_BDA] = "sms1xxx-hcw-55xxx-dvbt-01.fw",
 	},

diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index 6ae4cc8..933eaef 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c

@@ -3431,7 +3431,7 @@
 	dprintk("bttv: open minor=%d\n",minor);
 
 	for (i = 0; i < bttv_num; i++) {
-		if (bttvs[i].radio_dev->minor == minor) {
+		if (bttvs[i].radio_dev && bttvs[i].radio_dev->minor == minor) {
 			btv = &bttvs[i];
 			break;
 		}

diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c
index c149b7d..5405c30 100644
--- a/drivers/media/video/cafe_ccic.c
+++ b/drivers/media/video/cafe_ccic.c

@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/fs.h>
+#include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>

diff --git a/drivers/media/video/cpia2/cpia2_usb.c b/drivers/media/video/cpia2/cpia2_usb.c
index a4574740..a8a1990 100644
--- a/drivers/media/video/cpia2/cpia2_usb.c
+++ b/drivers/media/video/cpia2/cpia2_usb.c

@@ -632,7 +632,7 @@
 static int submit_urbs(struct camera_data *cam)
 {
 	struct urb *urb;
-	int fx, err, i;
+	int fx, err, i, j;
 
 	for(i=0; i<NUM_SBUF; ++i) {
 		if (cam->sbuf[i].data)
@@ -657,6 +657,9 @@
 		}
 		urb = usb_alloc_urb(FRAMES_PER_DESC, GFP_KERNEL);
 		if (!urb) {
+			ERR("%s: usb_alloc_urb error!\n", __func__);
+			for (j = 0; j < i; j++)
+				usb_free_urb(cam->sbuf[j].urb);
 			return -ENOMEM;
 		}
 

diff --git a/drivers/media/video/cx18/cx18-cards.c b/drivers/media/video/cx18/cx18-cards.c
index 8fe5f38..3cb9734 100644
--- a/drivers/media/video/cx18/cx18-cards.c
+++ b/drivers/media/video/cx18/cx18-cards.c

@@ -163,7 +163,7 @@
 	},
 	.audio_inputs = {
 		{ CX18_CARD_INPUT_AUD_TUNER,
-		  CX18_AV_AUDIO8, 0 },
+		  CX18_AV_AUDIO5, 0 },
 		{ CX18_CARD_INPUT_LINE_IN1,
 		  CX18_AV_AUDIO_SERIAL1, 0 },
 	},

diff --git a/drivers/media/video/em28xx/em28xx-audio.c b/drivers/media/video/em28xx/em28xx-audio.c
index 3c00610..ac3292d 100644
--- a/drivers/media/video/em28xx/em28xx-audio.c
+++ b/drivers/media/video/em28xx/em28xx-audio.c

@@ -117,10 +117,10 @@
 
 			if (oldptr + length >= runtime->buffer_size) {
 				unsigned int cnt =
-				    runtime->buffer_size - oldptr - 1;
+				    runtime->buffer_size - oldptr;
 				memcpy(runtime->dma_area + oldptr * stride, cp,
 				       cnt * stride);
-				memcpy(runtime->dma_area, cp + cnt,
+				memcpy(runtime->dma_area, cp + cnt * stride,
 				       length * stride - cnt * stride);
 			} else {
 				memcpy(runtime->dma_area + oldptr * stride, cp,
@@ -161,8 +161,14 @@
 
 		memset(dev->adev->transfer_buffer[i], 0x80, sb_size);
 		urb = usb_alloc_urb(EM28XX_NUM_AUDIO_PACKETS, GFP_ATOMIC);
-		if (!urb)
+		if (!urb) {
+			em28xx_errdev("usb_alloc_urb failed!\n");
+			for (j = 0; j < i; j++) {
+				usb_free_urb(dev->adev->urb[j]);
+				kfree(dev->adev->transfer_buffer[j]);
+			}
 			return -ENOMEM;
+		}
 
 		urb->dev = dev->udev;
 		urb->context = dev;

diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index 452da70..de943cf 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c

@@ -93,28 +93,6 @@
 			.amux     = 0,
 		} },
 	},
-	[EM2800_BOARD_KWORLD_USB2800] = {
-		.name         = "Kworld USB2800",
-		.valid        = EM28XX_BOARD_NOT_VALIDATED,
-		.is_em2800    = 1,
-		.vchannels    = 3,
-		.tuner_type   = TUNER_PHILIPS_FCV1236D,
-		.tda9887_conf = TDA9887_PRESENT,
-		.decoder      = EM28XX_SAA7113,
-		.input          = { {
-			.type     = EM28XX_VMUX_TELEVISION,
-			.vmux     = SAA7115_COMPOSITE2,
-			.amux     = 0,
-		}, {
-			.type     = EM28XX_VMUX_COMPOSITE1,
-			.vmux     = SAA7115_COMPOSITE0,
-			.amux     = 1,
-		}, {
-			.type     = EM28XX_VMUX_SVIDEO,
-			.vmux     = SAA7115_SVIDEO3,
-			.amux     = 1,
-		} },
-	},
 	[EM2820_BOARD_KWORLD_PVRTV2800RF] = {
 		.name         = "Kworld PVR TV 2800 RF",
 		.is_em2800    = 0,
@@ -599,7 +577,7 @@
 		}, {
 			.type     = EM28XX_VMUX_COMPOSITE1,
 			.vmux     = TVP5150_COMPOSITE1,
-			.amux     = 1,
+			.amux     = 3,
 		}, {
 			.type     = EM28XX_VMUX_SVIDEO,
 			.vmux     = TVP5150_SVIDEO,
@@ -952,22 +930,23 @@
 	},
 	[EM2880_BOARD_KWORLD_DVB_310U] = {
 		.name	      = "KWorld DVB-T 310U",
-		.valid        = EM28XX_BOARD_NOT_VALIDATED,
 		.vchannels    = 3,
 		.tuner_type   = TUNER_XC2028,
+		.has_dvb      = 1,
+		.mts_firmware = 1,
 		.decoder      = EM28XX_TVP5150,
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
 			.vmux     = TVP5150_COMPOSITE0,
-			.amux     = 0,
+			.amux     = EM28XX_AMUX_VIDEO,
 		}, {
 			.type     = EM28XX_VMUX_COMPOSITE1,
 			.vmux     = TVP5150_COMPOSITE1,
-			.amux     = 1,
-		}, {
+			.amux     = EM28XX_AMUX_AC97_LINE_IN,
+		}, {	/* S-video has not been tested yet */
 			.type     = EM28XX_VMUX_SVIDEO,
 			.vmux     = TVP5150_SVIDEO,
-			.amux     = 1,
+			.amux     = EM28XX_AMUX_AC97_LINE_IN,
 		} },
 	},
 	[EM2881_BOARD_DNT_DA2_HYBRID] = {
@@ -1282,6 +1261,7 @@
 static struct em28xx_hash_table em28xx_eeprom_hash [] = {
 	/* P/N: SA 60002070465 Tuner: TVF7533-MF */
 	{0x6ce05a8f, EM2820_BOARD_PROLINK_PLAYTV_USB2, TUNER_YMEC_TVF_5533MF},
+	{0x966a0441, EM2880_BOARD_KWORLD_DVB_310U, TUNER_XC2028},
 };
 
 /* I2C devicelist hash table for devices with generic USB IDs */
@@ -1552,9 +1532,12 @@
 		/* djh - Not sure which demod we need here */
 		ctl->demod = XC3028_FE_DEFAULT;
 		break;
+	case EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600:
+		ctl->demod = XC3028_FE_DEFAULT;
+		ctl->fname = XC3028L_DEFAULT_FIRMWARE;
+		break;
 	case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950:
 	case EM2880_BOARD_PINNACLE_PCTV_HD_PRO:
-	case EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600:
 		/* FIXME: Better to specify the needed IF */
 		ctl->demod = XC3028_FE_DEFAULT;
 		break;
@@ -1764,6 +1747,20 @@
 		break;
 	case EM2820_BOARD_UNKNOWN:
 	case EM2800_BOARD_UNKNOWN:
+		/*
+		 * The K-WORLD DVB-T 310U is detected as an MSI Digivox AD.
+		 *
+		 * This occurs because they share identical USB vendor and
+		 * product IDs.
+		 *
+		 * What we do here is look up the EEPROM hash of the K-WORLD
+		 * and if it is found then we decide that we do not have
+		 * a DIGIVOX and reset the device to the K-WORLD instead.
+		 *
+		 * This solution is only valid if they do not share eeprom
+		 * hash identities which has not been determined as yet.
+		 */
+	case EM2880_BOARD_MSI_DIGIVOX_AD:
 		if (!em28xx_hint_board(dev))
 			em28xx_set_model(dev);
 		break;

diff --git a/drivers/media/video/em28xx/em28xx-dvb.c b/drivers/media/video/em28xx/em28xx-dvb.c
index 4b992bc..d2b1a1a 100644
--- a/drivers/media/video/em28xx/em28xx-dvb.c
+++ b/drivers/media/video/em28xx/em28xx-dvb.c

@@ -452,6 +452,15 @@
 			goto out_free;
 		}
 		break;
+	case EM2880_BOARD_KWORLD_DVB_310U:
+		dvb->frontend = dvb_attach(zl10353_attach,
+						&em28xx_zl10353_with_xc3028,
+						&dev->i2c_adap);
+		if (attach_xc3028(0x61, dev) < 0) {
+			result = -EINVAL;
+			goto out_free;
+		}
+		break;
 	default:
 		printk(KERN_ERR "%s/2: The frontend of your DVB/ATSC card"
 				" isn't supported yet\n",

diff --git a/drivers/media/video/gspca/gspca.c b/drivers/media/video/gspca/gspca.c
index 7be6928..ac95c55 100644
--- a/drivers/media/video/gspca/gspca.c
+++ b/drivers/media/video/gspca/gspca.c

@@ -459,6 +459,7 @@
 		urb = usb_alloc_urb(npkt, GFP_KERNEL);
 		if (!urb) {
 			err("usb_alloc_urb failed");
+			destroy_urbs(gspca_dev);
 			return -ENOMEM;
 		}
 		urb->transfer_buffer = usb_buffer_alloc(gspca_dev->dev,
@@ -468,8 +469,8 @@
 
 		if (urb->transfer_buffer == NULL) {
 			usb_free_urb(urb);
-			destroy_urbs(gspca_dev);
 			err("usb_buffer_urb failed");
+			destroy_urbs(gspca_dev);
 			return -ENOMEM;
 		}
 		gspca_dev->urb[n] = urb;

diff --git a/drivers/media/video/gspca/pac7311.c b/drivers/media/video/gspca/pac7311.c
index d4be518..ba865b7 100644
--- a/drivers/media/video/gspca/pac7311.c
+++ b/drivers/media/video/gspca/pac7311.c

@@ -1063,6 +1063,7 @@
 	{USB_DEVICE(0x093a, 0x2621), .driver_info = SENSOR_PAC7302},
 	{USB_DEVICE(0x093a, 0x2624), .driver_info = SENSOR_PAC7302},
 	{USB_DEVICE(0x093a, 0x2626), .driver_info = SENSOR_PAC7302},
+	{USB_DEVICE(0x093a, 0x262a), .driver_info = SENSOR_PAC7302},
 	{}
 };
 MODULE_DEVICE_TABLE(usb, device_table);

diff --git a/drivers/media/video/gspca/sonixb.c b/drivers/media/video/gspca/sonixb.c
index 5dd78c6..12b81ae 100644
--- a/drivers/media/video/gspca/sonixb.c
+++ b/drivers/media/video/gspca/sonixb.c

@@ -232,7 +232,7 @@
 static struct v4l2_pix_format vga_mode[] = {
 	{160, 120, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE,
 		.bytesperline = 160,
-		.sizeimage = 160 * 120 * 5 / 4,
+		.sizeimage = 160 * 120,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 2 | MODE_RAW},
 	{160, 120, V4L2_PIX_FMT_SN9C10X, V4L2_FIELD_NONE,
@@ -264,7 +264,7 @@
 		.priv = 1 | MODE_REDUCED_SIF},
 	{176, 144, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE,
 		.bytesperline = 176,
-		.sizeimage = 176 * 144 * 5 / 4,
+		.sizeimage = 176 * 144,
 		.colorspace = V4L2_COLORSPACE_SRGB,
 		.priv = 1 | MODE_RAW},
 	{176, 144, V4L2_PIX_FMT_SN9C10X, V4L2_FIELD_NONE,

diff --git a/drivers/media/video/gspca/sonixj.c b/drivers/media/video/gspca/sonixj.c
index d75b1d2..572b0f3 100644
--- a/drivers/media/video/gspca/sonixj.c
+++ b/drivers/media/video/gspca/sonixj.c

@@ -707,6 +707,7 @@
 			0x08, 0,		/* value, index */
 			gspca_dev->usb_buf, 8,
 			500);
+	msleep(2);
 }
 
 /* read 5 bytes in gspca_dev->usb_buf */
@@ -976,13 +977,13 @@
 	case BRIDGE_SN9C105:
 		if (regF1 != 0x11)
 			return -ENODEV;
-		reg_w(gspca_dev, 0x02, regGpio, 2);
+		reg_w(gspca_dev, 0x01, regGpio, 2);
 		break;
 	case BRIDGE_SN9C120:
 		if (regF1 != 0x12)
 			return -ENODEV;
 		regGpio[1] = 0x70;
-		reg_w(gspca_dev, 0x02, regGpio, 2);
+		reg_w(gspca_dev, 0x01, regGpio, 2);
 		break;
 	default:
 /*	case BRIDGE_SN9C110: */
@@ -1183,7 +1184,7 @@
 	static const __u8 CA[] = { 0x28, 0xd8, 0x14, 0xec };
 	static const __u8 CE[] = { 0x32, 0xdd, 0x2d, 0xdd };	/* MI0360 */
 	static const __u8 CE_ov76xx[] =
-			{ 0x32, 0xdd, 0x32, 0xdd };	/* OV7630/48 */
+				{ 0x32, 0xdd, 0x32, 0xdd };
 
 	sn9c1xx = sn_tb[(int) sd->sensor];
 	configure_gpio(gspca_dev, sn9c1xx);
@@ -1223,8 +1224,15 @@
 	reg_w(gspca_dev, 0x20, gamma_def, sizeof gamma_def);
 	for (i = 0; i < 8; i++)
 		reg_w(gspca_dev, 0x84, reg84, sizeof reg84);
+	switch (sd->sensor) {
+	case SENSOR_OV7660:
+		reg_w1(gspca_dev, 0x9a, 0x05);
+		break;
+	default:
 		reg_w1(gspca_dev, 0x9a, 0x08);
 		reg_w1(gspca_dev, 0x99, 0x59);
+		break;
+	}
 
 	mode = gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv;
 	if (mode)
@@ -1275,8 +1283,8 @@
 /*			reg1 = 0x44; */
 /*			reg1 = 0x46;	(done) */
 		} else {
-			reg17 = 0x22;	/* 640 MCKSIZE */
-			reg1 = 0x06;
+			reg17 = 0xa2;	/* 640 */
+			reg1 = 0x44;
 		}
 		break;
 	}
@@ -1285,6 +1293,7 @@
 	switch (sd->sensor) {
 	case SENSOR_OV7630:
 	case SENSOR_OV7648:
+	case SENSOR_OV7660:
 		reg_w(gspca_dev, 0xce, CE_ov76xx, 4);
 		break;
 	default:

diff --git a/drivers/media/video/gspca/spca561.c b/drivers/media/video/gspca/spca561.c
index cfbc9eb..95fcfcb 100644
--- a/drivers/media/video/gspca/spca561.c
+++ b/drivers/media/video/gspca/spca561.c

@@ -225,7 +225,7 @@
 	reg_w_val(gspca_dev->dev, 0x8802, (mode | 0x01));
 	do {
 		reg_r(gspca_dev, 0x8803, 1);
-		if (!gspca_dev->usb_buf)
+		if (!gspca_dev->usb_buf[0])
 			break;
 	} while (--retry);
 	if (retry == 0)

diff --git a/drivers/media/video/gspca/zc3xx.c b/drivers/media/video/gspca/zc3xx.c
index 8d7c27e..d61ef72 100644
--- a/drivers/media/video/gspca/zc3xx.c
+++ b/drivers/media/video/gspca/zc3xx.c

@@ -6576,8 +6576,8 @@
 		 cs2102_60HZ, cs2102_60HZScale},
 /* SENSOR_CS2102K 1 */
 		{cs2102_NoFliker, cs2102_NoFlikerScale,
-		 cs2102_50HZ, cs2102_50HZScale,
-		 cs2102_60HZ, cs2102_60HZScale},
+		 NULL, NULL, /* currently disabled */
+		 NULL, NULL},
 /* SENSOR_GC0305 2 */
 		{gc0305_NoFliker, gc0305_NoFliker,
 		 gc0305_50HZ, gc0305_50HZ,

diff --git a/drivers/media/video/ov511.c b/drivers/media/video/ov511.c
index 3d3c48d..c685240 100644
--- a/drivers/media/video/ov511.c
+++ b/drivers/media/video/ov511.c

@@ -3591,7 +3591,7 @@
 ov51x_init_isoc(struct usb_ov511 *ov)
 {
 	struct urb *urb;
-	int fx, err, n, size;
+	int fx, err, n, i, size;
 
 	PDEBUG(3, "*** Initializing capture ***");
 
@@ -3662,6 +3662,8 @@
 		urb = usb_alloc_urb(FRAMES_PER_DESC, GFP_KERNEL);
 		if (!urb) {
 			err("init isoc: usb_alloc_urb ret. NULL");
+			for (i = 0; i < n; i++)
+				usb_free_urb(ov->sbuf[i].urb);
 			return -ENOMEM;
 		}
 		ov->sbuf[n].urb = urb;
@@ -5651,7 +5653,7 @@
 	if (!ov->dev)
 		return -ENODEV;
 	sensor_get_exposure(ov, &exp);
-	return sprintf(buf, "%d\n", exp >> 8);
+	return sprintf(buf, "%d\n", exp);
 }
 static DEVICE_ATTR(exposure, S_IRUGO, show_exposure, NULL);
 

diff --git a/drivers/media/video/pvrusb2/pvrusb2-devattr.c b/drivers/media/video/pvrusb2/pvrusb2-devattr.c
index 88e1751..cbe2a34 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-devattr.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-devattr.c

@@ -489,6 +489,8 @@
 struct usb_device_id pvr2_device_table[] = {
 	{ USB_DEVICE(0x2040, 0x2900),
 	  .driver_info = (kernel_ulong_t)&pvr2_device_29xxx},
+	{ USB_DEVICE(0x2040, 0x2950), /* Logically identical to 2900 */
+	  .driver_info = (kernel_ulong_t)&pvr2_device_29xxx},
 	{ USB_DEVICE(0x2040, 0x2400),
 	  .driver_info = (kernel_ulong_t)&pvr2_device_24xxx},
 	{ USB_DEVICE(0x1164, 0x0622),

diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c
index b1d09d8..92b83fe 100644
--- a/drivers/media/video/s2255drv.c
+++ b/drivers/media/video/s2255drv.c

@@ -669,7 +669,7 @@
 		(unsigned long)vbuf, pos);
 	/* tell v4l buffer was filled */
 
-	buf->vb.field_count++;
+	buf->vb.field_count = dev->frame_count[chn] * 2;
 	do_gettimeofday(&ts);
 	buf->vb.ts = ts;
 	buf->vb.state = VIDEOBUF_DONE;
@@ -1268,6 +1268,7 @@
 	dev->last_frame[chn] = -1;
 	dev->bad_payload[chn] = 0;
 	dev->cur_frame[chn] = 0;
+	dev->frame_count[chn] = 0;
 	for (j = 0; j < SYS_FRAMES; j++) {
 		dev->buffer[chn].frame[j].ulState = 0;
 		dev->buffer[chn].frame[j].cur_size = 0;

diff --git a/drivers/media/video/uvc/uvc_ctrl.c b/drivers/media/video/uvc/uvc_ctrl.c
index 6ef3e52..feab12a 100644
--- a/drivers/media/video/uvc/uvc_ctrl.c
+++ b/drivers/media/video/uvc/uvc_ctrl.c

@@ -592,7 +592,7 @@
 	if (ctrl == NULL)
 		return -EINVAL;
 
-	data = kmalloc(8, GFP_KERNEL);
+	data = kmalloc(ctrl->info->size, GFP_KERNEL);
 	if (data == NULL)
 		return -ENOMEM;
 

diff --git a/drivers/media/video/w9968cf.c b/drivers/media/video/w9968cf.c
index 168baab..11edf79 100644
--- a/drivers/media/video/w9968cf.c
+++ b/drivers/media/video/w9968cf.c

@@ -911,7 +911,6 @@
 
 	for (i = 0; i < W9968CF_URBS; i++) {
 		urb = usb_alloc_urb(W9968CF_ISO_PACKETS, GFP_KERNEL);
-		cam->urb[i] = urb;
 		if (!urb) {
 			for (j = 0; j < i; j++)
 				usb_free_urb(cam->urb[j]);
@@ -919,6 +918,7 @@
 			return -ENOMEM;
 		}
 
+		cam->urb[i] = urb;
 		urb->dev = udev;
 		urb->context = (void*)cam;
 		urb->pipe = usb_rcvisocpipe(udev, 1);

diff --git a/drivers/media/video/wm8739.c b/drivers/media/video/wm8739.c
index 95c79ad..54ac3fe 100644
--- a/drivers/media/video/wm8739.c
+++ b/drivers/media/video/wm8739.c

@@ -274,10 +274,8 @@
 			client->addr << 1, client->adapter->name);
 
 	state = kmalloc(sizeof(struct wm8739_state), GFP_KERNEL);
-	if (state == NULL) {
-		kfree(client);
+	if (state == NULL)
 		return -ENOMEM;
-	}
 	state->vol_l = 0x17; /* 0dB */
 	state->vol_r = 0x17; /* 0dB */
 	state->muted = 0;

diff --git a/drivers/media/video/zoran_card.c b/drivers/media/video/zoran_card.c
index d842a7c..3282be7 100644
--- a/drivers/media/video/zoran_card.c
+++ b/drivers/media/video/zoran_card.c

@@ -988,7 +988,7 @@
 	zr->v4l_grab_seq = 0;
 	zr->v4l_settings.width = 192;
 	zr->v4l_settings.height = 144;
-	zr->v4l_settings.format = &zoran_formats[4];	/* YUY2 - YUV-4:2:2 packed */
+	zr->v4l_settings.format = &zoran_formats[7];	/* YUY2 - YUV-4:2:2 packed */
 	zr->v4l_settings.bytesperline =
 	    zr->v4l_settings.width *
 	    ((zr->v4l_settings.format->depth + 7) / 8);

diff --git a/drivers/media/video/zoran_driver.c b/drivers/media/video/zoran_driver.c
index ec6f596..2dab9ee 100644
--- a/drivers/media/video/zoran_driver.c
+++ b/drivers/media/video/zoran_driver.c

@@ -134,7 +134,7 @@
 	}, {
 		.name = "16-bit RGB BE",
 		ZFMT(-1,
-		     V4L2_PIX_FMT_RGB565, V4L2_COLORSPACE_SRGB),
+		     V4L2_PIX_FMT_RGB565X, V4L2_COLORSPACE_SRGB),
 		.depth = 16,
 		.flags = ZORAN_FORMAT_CAPTURE |
 			 ZORAN_FORMAT_OVERLAY,
@@ -2737,7 +2737,8 @@
 				    fh->v4l_settings.format->fourcc;
 				fmt->fmt.pix.colorspace =
 				    fh->v4l_settings.format->colorspace;
-				fmt->fmt.pix.bytesperline = 0;
+				fmt->fmt.pix.bytesperline =
+				    fh->v4l_settings.bytesperline;
 				if (BUZ_MAX_HEIGHT <
 				    (fh->v4l_settings.height * 2))
 					fmt->fmt.pix.field =
@@ -2833,13 +2834,6 @@
 				fmt->fmt.pix.pixelformat,
 				(char *) &printformat);
 
-			if (fmt->fmt.pix.bytesperline > 0) {
-				dprintk(5,
-					KERN_ERR "%s: bpl not supported\n",
-					ZR_DEVNAME(zr));
-				return -EINVAL;
-			}
-
 			/* we can be requested to do JPEG/raw playback/capture */
 			if (!
 			    (fmt->type == V4L2_BUF_TYPE_VIDEO_CAPTURE ||
@@ -2923,6 +2917,7 @@
 				fh->jpg_buffers.buffer_size =
 				    zoran_v4l2_calc_bufsize(&fh->
 							    jpg_settings);
+				fmt->fmt.pix.bytesperline = 0;
 				fmt->fmt.pix.sizeimage =
 				    fh->jpg_buffers.buffer_size;
 
@@ -2979,6 +2974,8 @@
 
 				/* tell the user the
 				 * results/missing stuff */
+				fmt->fmt.pix.bytesperline =
+					fh->v4l_settings.bytesperline;
 				fmt->fmt.pix.sizeimage =
 					fh->v4l_settings.height *
 					fh->v4l_settings.bytesperline;

diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index d2d2318..6e291bf 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c

@@ -197,7 +197,7 @@
 static int mspro_block_disk_release(struct gendisk *disk)
 {
 	struct mspro_block_data *msb = disk->private_data;
-	int disk_id = disk->first_minor >> MSPRO_BLOCK_PART_SHIFT;
+	int disk_id = MINOR(disk_devt(disk)) >> MSPRO_BLOCK_PART_SHIFT;
 
 	mutex_lock(&mspro_block_disk_lock);
 
@@ -828,7 +828,7 @@
 
 	if (msb->eject) {
 		while ((req = elv_next_request(q)) != NULL)
-			end_queued_request(req, -ENODEV);
+			__blk_end_request(req, -ENODEV, blk_rq_bytes(req));
 
 		return;
 	}

diff --git a/drivers/misc/eeepc-laptop.c b/drivers/misc/eeepc-laptop.c
index facdb98..1ee8501 100644
--- a/drivers/misc/eeepc-laptop.c
+++ b/drivers/misc/eeepc-laptop.c

@@ -450,12 +450,14 @@
 	int value = 0;
 
 	read_acpi_int(NULL, EEEPC_EC_FAN_PWM, &value);
+	value = value * 255 / 100;
 	return (value);
 }
 
 static void eeepc_set_fan_pwm(int value)
 {
-	value = SENSORS_LIMIT(value, 0, 100);
+	value = SENSORS_LIMIT(value, 0, 255);
+	value = value * 100 / 255;
 	ec_write(EEEPC_EC_SC02, value);
 }
 
@@ -520,15 +522,23 @@
 	static SENSOR_DEVICE_ATTR(_name, _mode, show_##_name, store_##_name, 0);
 
 EEEPC_CREATE_SENSOR_ATTR(fan1_input, S_IRUGO, eeepc_get_fan_rpm, NULL);
-EEEPC_CREATE_SENSOR_ATTR(fan1_pwm, S_IRUGO | S_IWUSR,
+EEEPC_CREATE_SENSOR_ATTR(pwm1, S_IRUGO | S_IWUSR,
 			 eeepc_get_fan_pwm, eeepc_set_fan_pwm);
 EEEPC_CREATE_SENSOR_ATTR(pwm1_enable, S_IRUGO | S_IWUSR,
 			 eeepc_get_fan_ctrl, eeepc_set_fan_ctrl);
 
+static ssize_t
+show_name(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "eeepc\n");
+}
+static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, 0);
+
 static struct attribute *hwmon_attributes[] = {
-	&sensor_dev_attr_fan1_pwm.dev_attr.attr,
+	&sensor_dev_attr_pwm1.dev_attr.attr,
 	&sensor_dev_attr_fan1_input.dev_attr.attr,
 	&sensor_dev_attr_pwm1_enable.dev_attr.attr,
+	&sensor_dev_attr_name.dev_attr.attr,
 	NULL
 };
 

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index a9b6146..efacee0 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c

@@ -84,7 +84,7 @@
 	mutex_lock(&open_lock);
 	md->usage--;
 	if (md->usage == 0) {
-		int devidx = md->disk->first_minor >> MMC_SHIFT;
+		int devidx = MINOR(disk_devt(md->disk)) >> MMC_SHIFT;
 		__clear_bit(devidx, dev_use);
 
 		put_disk(md->disk);

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 917035e..0000896 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c

@@ -426,8 +426,6 @@
 	host->sg = NULL;
 	host->data = data;
 
-	mci_writel(host, BLKR, MCI_BCNT(data->blocks)
-			| MCI_BLKLEN(data->blksz));
 	dev_vdbg(&mmc->class_dev, "BLKR=0x%08x\n",
 			MCI_BCNT(data->blocks) | MCI_BLKLEN(data->blksz));
 
@@ -483,6 +481,10 @@
 		if (data->blocks > 1 && data->blksz & 3)
 			goto fail;
 		atmci_set_timeout(host, data);
+
+		/* Must set block count/size before sending command */
+		mci_writel(host, BLKR, MCI_BCNT(data->blocks)
+				| MCI_BLKLEN(data->blksz));
 	}
 
 	iflags = MCI_CMDRDY;

diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index f34f20c..9bf581c 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c

@@ -1005,6 +1005,29 @@
 	return ftl_write((void *)dev, buf, block, 1);
 }
 
+static int ftl_discardsect(struct mtd_blktrans_dev *dev,
+			   unsigned long sector, unsigned nr_sects)
+{
+	partition_t *part = (void *)dev;
+	uint32_t bsize = 1 << part->header.EraseUnitSize;
+
+	DEBUG(1, "FTL erase sector %ld for %d sectors\n",
+	      sector, nr_sects);
+
+	while (nr_sects) {
+		uint32_t old_addr = part->VirtualBlockMap[sector];
+		if (old_addr != 0xffffffff) {
+			part->VirtualBlockMap[sector] = 0xffffffff;
+			part->EUNInfo[old_addr/bsize].Deleted++;
+			if (set_bam_entry(part, old_addr, 0))
+				return -EIO;
+		}
+		nr_sects--;
+		sector++;
+	}
+
+	return 0;
+}
 /*====================================================================*/
 
 static void ftl_freepart(partition_t *part)
@@ -1069,6 +1092,7 @@
 	.blksize 	= SECTOR_SIZE,
 	.readsect	= ftl_readsect,
 	.writesect	= ftl_writesect,
+	.discard	= ftl_discardsect,
 	.getgeo		= ftl_getgeo,
 	.add_mtd	= ftl_add_mtd,
 	.remove_dev	= ftl_remove_dev,

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 9ff007c..681d5ac 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c

@@ -32,6 +32,14 @@
 	spinlock_t queue_lock;
 };
 
+static int blktrans_discard_request(struct request_queue *q,
+				    struct request *req)
+{
+	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
+	req->cmd[0] = REQ_LB_OP_DISCARD;
+	return 0;
+}
+
 static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 			       struct mtd_blktrans_dev *dev,
 			       struct request *req)
@@ -44,6 +52,10 @@
 
 	buf = req->buffer;
 
+	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
+	    req->cmd[0] == REQ_LB_OP_DISCARD)
+		return !tr->discard(dev, block, nsect);
+
 	if (!blk_fs_request(req))
 		return 0;
 
@@ -367,6 +379,10 @@
 
 	tr->blkcore_priv->rq->queuedata = tr;
 	blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize);
+	if (tr->discard)
+		blk_queue_set_discard(tr->blkcore_priv->rq,
+				      blktrans_discard_request);
+
 	tr->blkshift = ffs(tr->blksize) - 1;
 
 	tr->blkcore_priv->thread = kthread_run(mtd_blktrans_thread, tr,

diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index 0b6095b..bcd2bc4 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c

@@ -396,7 +396,7 @@
 	u32 extcnf_ctrl;
 	u32 timeout = PHY_CFG_TIMEOUT;
 
-	WARN_ON(preempt_count());
+	might_sleep();
 
 	if (!mutex_trylock(&nvm_mutex)) {
 		WARN(1, KERN_ERR "e1000e mutex contention. Owned by pid %d\n",

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 9c71858..77baff0 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c

@@ -16,6 +16,7 @@
 
 
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/pci.h>
 #include <linux/stat.h>
 #include <linux/topology.h>
@@ -484,6 +485,21 @@
 #endif /* HAVE_PCI_LEGACY */
 
 #ifdef HAVE_PCI_MMAP
+
+static int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma)
+{
+	unsigned long nr, start, size;
+
+	nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	start = vma->vm_pgoff;
+	size = pci_resource_len(pdev, resno) >> PAGE_SHIFT;
+	if (start < size && size - start >= nr)
+		return 1;
+	WARN(1, "process \"%s\" tried to map 0x%08lx-0x%08lx on %s BAR %d (size 0x%08lx)\n",
+		current->comm, start, start+nr, pci_name(pdev), resno, size);
+	return 0;
+}
+
 /**
  * pci_mmap_resource - map a PCI resource into user memory space
  * @kobj: kobject for mapping
@@ -510,6 +526,9 @@
 	if (i >= PCI_ROM_RESOURCE)
 		return -ENODEV;
 
+	if (!pci_mmap_fits(pdev, i, vma))
+		return -EINVAL;
+
 	/* pci_mmap_page_range() expects the same kind of entry as coming
 	 * from /proc/bus/pci/ which is a "user visible" value. If this is
 	 * different from the resource itself, arch will do necessary fixup.

diff --git a/drivers/pnp/Makefile b/drivers/pnp/Makefile
index 26f5abc..e83f34f 100644
--- a/drivers/pnp/Makefile
+++ b/drivers/pnp/Makefile

@@ -2,12 +2,15 @@
 # Makefile for the Linux Plug-and-Play Support.
 #
 
-obj-y		:= core.o card.o driver.o resource.o manager.o support.o interface.o quirks.o system.o
+obj-y		:= core.o card.o driver.o resource.o manager.o support.o interface.o quirks.o
 
 obj-$(CONFIG_PNPACPI)		+= pnpacpi/
 obj-$(CONFIG_PNPBIOS)		+= pnpbios/
 obj-$(CONFIG_ISAPNP)		+= isapnp/
 
+# pnp_system_init goes after pnpacpi/pnpbios init
+obj-y				+= system.o
+
 ifeq ($(CONFIG_PNP_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
 endif

diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index c1b9ea3..53561d7 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c

@@ -268,7 +268,7 @@
 	return 0;
 }
 
-subsys_initcall(pnpacpi_init);
+fs_initcall(pnpacpi_init);
 
 static int __init pnpacpi_setup(char *str)
 {

diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index 19a4be1..662dfcdd 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c

@@ -571,7 +571,7 @@
 	return 0;
 }
 
-subsys_initcall(pnpbios_init);
+fs_initcall(pnpbios_init);
 
 static int __init pnpbios_thread_init(void)
 {

diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index f118252..52e2743 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c

@@ -422,6 +422,12 @@
 	return err;
 }
 
+static int rtc_dev_fasync(int fd, struct file *file, int on)
+{
+	struct rtc_device *rtc = file->private_data;
+	return fasync_helper(fd, file, on, &rtc->async_queue);
+}
+
 static int rtc_dev_release(struct inode *inode, struct file *file)
 {
 	struct rtc_device *rtc = file->private_data;
@@ -434,16 +440,13 @@
 	if (rtc->ops->release)
 		rtc->ops->release(rtc->dev.parent);
 
+	if (file->f_flags & FASYNC)
+		rtc_dev_fasync(-1, file, 0);
+
 	clear_bit_unlock(RTC_DEV_BUSY, &rtc->flags);
 	return 0;
 }
 
-static int rtc_dev_fasync(int fd, struct file *file, int on)
-{
-	struct rtc_device *rtc = file->private_data;
-	return fasync_helper(fd, file, on, &rtc->async_queue);
-}
-
 static const struct file_operations rtc_dev_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,

diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c
index 03c0e40..e3b5c4d 100644
--- a/drivers/s390/block/dasd_proc.c
+++ b/drivers/s390/block/dasd_proc.c

@@ -76,7 +76,8 @@
 	/* Print kdev. */
 	if (block->gdp)
 		seq_printf(m, " at (%3d:%6d)",
-			   block->gdp->major, block->gdp->first_minor);
+			   MAJOR(disk_devt(block->gdp)),
+			   MINOR(disk_devt(block->gdp)));
 	else
 		seq_printf(m, "  at (???:??????)");
 	/* Print device name. */

diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 711b3004..9481e4a 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c

@@ -114,7 +114,7 @@
 		found = 0;
 		// test if minor available
 		list_for_each_entry(entry, &dcssblk_devices, lh)
-			if (minor == entry->gd->first_minor)
+			if (minor == MINOR(disk_devt(entry->gd)))
 				found++;
 		if (!found) break; // got unused minor
 	}
@@ -397,7 +397,7 @@
 		goto unload_seg;
 	}
 	sprintf(dev_info->gd->disk_name, "dcssblk%d",
-		dev_info->gd->first_minor);
+		MINOR(disk_devt(dev_info->gd)));
 	list_add_tail(&dev_info->lh, &dcssblk_devices);
 
 	if (!try_module_get(THIS_MODULE)) {

diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c
index 1679e2f..a0b6b46 100644
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c

@@ -447,51 +447,36 @@
 {
 	char s[80];
 
-	sprintf(s, "%s sc:%x ", cdev->dev.bus_id, irq_ptr->schid.sch_no);
-
+	sprintf(s, "qdio: %s ", dev_name(&cdev->dev));
 	switch (irq_ptr->qib.qfmt) {
 	case QDIO_QETH_QFMT:
-		sprintf(s + strlen(s), "OSADE ");
+		sprintf(s + strlen(s), "OSA ");
 		break;
 	case QDIO_ZFCP_QFMT:
 		sprintf(s + strlen(s), "ZFCP ");
 		break;
 	case QDIO_IQDIO_QFMT:
-		sprintf(s + strlen(s), "HiperSockets ");
+		sprintf(s + strlen(s), "HS ");
 		break;
 	}
-	sprintf(s + strlen(s), "using: ");
-
-	if (!is_thinint_irq(irq_ptr))
-		sprintf(s + strlen(s), "no");
-	sprintf(s + strlen(s), "AdapterInterrupts ");
-	if (!(irq_ptr->sch_token != 0))
-		sprintf(s + strlen(s), "no");
-	sprintf(s + strlen(s), "QEBSM ");
-	if (!(irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED))
-		sprintf(s + strlen(s), "no");
-	sprintf(s + strlen(s), "OutboundPCI ");
-	if (!css_general_characteristics.aif_tdd)
-		sprintf(s + strlen(s), "no");
-	sprintf(s + strlen(s), "TDD\n");
-	printk(KERN_INFO "qdio: %s", s);
-
-	memset(s, 0, sizeof(s));
-	sprintf(s, "%s SIGA required: ", cdev->dev.bus_id);
-	if (irq_ptr->siga_flag.input)
-		sprintf(s + strlen(s), "Read ");
-	if (irq_ptr->siga_flag.output)
-		sprintf(s + strlen(s), "Write ");
-	if (irq_ptr->siga_flag.sync)
-		sprintf(s + strlen(s), "Sync ");
-	if (!irq_ptr->siga_flag.no_sync_ti)
-		sprintf(s + strlen(s), "SyncAI ");
-	if (!irq_ptr->siga_flag.no_sync_out_ti)
-		sprintf(s + strlen(s), "SyncOutAI ");
-	if (!irq_ptr->siga_flag.no_sync_out_pci)
-		sprintf(s + strlen(s), "SyncOutPCI");
+	sprintf(s + strlen(s), "on SC %x using ", irq_ptr->schid.sch_no);
+	sprintf(s + strlen(s), "AI:%d ", is_thinint_irq(irq_ptr));
+	sprintf(s + strlen(s), "QEBSM:%d ", (irq_ptr->sch_token) ? 1 : 0);
+	sprintf(s + strlen(s), "PCI:%d ",
+		(irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED) ? 1 : 0);
+	sprintf(s + strlen(s), "TDD:%d ", css_general_characteristics.aif_tdd);
+	sprintf(s + strlen(s), "SIGA:");
+	sprintf(s + strlen(s), "%s", (irq_ptr->siga_flag.input) ? "R" : " ");
+	sprintf(s + strlen(s), "%s", (irq_ptr->siga_flag.output) ? "W" : " ");
+	sprintf(s + strlen(s), "%s", (irq_ptr->siga_flag.sync) ? "S" : " ");
+	sprintf(s + strlen(s), "%s",
+		(!irq_ptr->siga_flag.no_sync_ti) ? "A" : " ");
+	sprintf(s + strlen(s), "%s",
+		(!irq_ptr->siga_flag.no_sync_out_ti) ? "O" : " ");
+	sprintf(s + strlen(s), "%s",
+		(!irq_ptr->siga_flag.no_sync_out_pci) ? "P" : " ");
 	sprintf(s + strlen(s), "\n");
-	printk(KERN_INFO "qdio: %s", s);
+	printk(KERN_INFO "%s", s);
 }
 
 int __init qdio_setup_init(void)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index aa4e77c..8abfd06 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c

@@ -1139,7 +1139,7 @@
 	srbcmd->id       = cpu_to_le32(scmd_id(cmd));
 	srbcmd->lun      = cpu_to_le32(cmd->device->lun);
 	srbcmd->flags    = cpu_to_le32(flag);
-	timeout = cmd->timeout_per_command/HZ;
+	timeout = cmd->request->timeout/HZ;
 	if (timeout == 0)
 		timeout = 1;
 	srbcmd->timeout  = cpu_to_le32(timeout);  // timeout in seconds

diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index 822d521..c387c15 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c

@@ -464,7 +464,6 @@
 
     /* use request field to save the ptr. to completion struct. */
     scp->request = (struct request *)&wait;
-    scp->timeout_per_command = timeout*HZ;
     scp->cmd_len = 12;
     scp->cmnd = cmnd;
     cmndinfo.priority = IOCTL_PRI;
@@ -1995,23 +1994,12 @@
     register Scsi_Cmnd *pscp;
     register Scsi_Cmnd *nscp;
     ulong flags;
-    unchar b, t;
 
     TRACE(("gdth_putq() priority %d\n",priority));
     spin_lock_irqsave(&ha->smp_lock, flags);
 
-    if (!cmndinfo->internal_command) {
+    if (!cmndinfo->internal_command)
         cmndinfo->priority = priority;
-        b = scp->device->channel;
-        t = scp->device->id;
-        if (priority >= DEFAULT_PRI) {
-            if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha,b)].lock) ||
-                (b==ha->virt_bus && t<MAX_HDRIVES && ha->hdr[t].lock)) {
-                TRACE2(("gdth_putq(): locked IO ->update_timeout()\n"));
-                cmndinfo->timeout = gdth_update_timeout(scp, 0);
-            }
-        }
-    }
 
     if (ha->req_first==NULL) {
         ha->req_first = scp;                    /* queue was empty */
@@ -3899,6 +3887,39 @@
     return ((const char *)ha->binfo.type_string);
 }
 
+static enum blk_eh_timer_return gdth_timed_out(struct scsi_cmnd *scp)
+{
+	gdth_ha_str *ha = shost_priv(scp->device->host);
+	struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp);
+	unchar b, t;
+	ulong flags;
+	enum blk_eh_timer_return retval = BLK_EH_NOT_HANDLED;
+
+	TRACE(("%s() cmd 0x%x\n", scp->cmnd[0], __func__));
+	b = scp->device->channel;
+	t = scp->device->id;
+
+	/*
+	 * We don't really honor the command timeout, but we try to
+	 * honor 6 times of the actual command timeout! So reset the
+	 * timer if this is less than 6th timeout on this command!
+	 */
+	if (++cmndinfo->timeout_count < 6)
+		retval = BLK_EH_RESET_TIMER;
+
+	/* Reset the timeout if it is locked IO */
+	spin_lock_irqsave(&ha->smp_lock, flags);
+	if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha, b)].lock) ||
+	    (b == ha->virt_bus && t < MAX_HDRIVES && ha->hdr[t].lock)) {
+		TRACE2(("%s(): locked IO, reset timeout\n", __func__));
+		retval = BLK_EH_RESET_TIMER;
+	}
+	spin_unlock_irqrestore(&ha->smp_lock, flags);
+
+	return retval;
+}
+
+
 static int gdth_eh_bus_reset(Scsi_Cmnd *scp)
 {
     gdth_ha_str *ha = shost_priv(scp->device->host);
@@ -3992,7 +4013,7 @@
     BUG_ON(!cmndinfo);
 
     scp->scsi_done = done;
-    gdth_update_timeout(scp, scp->timeout_per_command * 6);
+    cmndinfo->timeout_count = 0;
     cmndinfo->priority = DEFAULT_PRI;
 
     return __gdth_queuecommand(ha, scp, cmndinfo);
@@ -4096,12 +4117,10 @@
             ha->hdr[j].lock = 1;
             spin_unlock_irqrestore(&ha->smp_lock, flags);
             gdth_wait_completion(ha, ha->bus_cnt, j);
-            gdth_stop_timeout(ha, ha->bus_cnt, j);
         } else {
             spin_lock_irqsave(&ha->smp_lock, flags);
             ha->hdr[j].lock = 0;
             spin_unlock_irqrestore(&ha->smp_lock, flags);
-            gdth_start_timeout(ha, ha->bus_cnt, j);
             gdth_next(ha);
         }
     } 
@@ -4539,18 +4558,14 @@
                 spin_lock_irqsave(&ha->smp_lock, flags);
                 ha->raw[i].lock = 1;
                 spin_unlock_irqrestore(&ha->smp_lock, flags);
-                for (j = 0; j < ha->tid_cnt; ++j) {
+		for (j = 0; j < ha->tid_cnt; ++j)
                     gdth_wait_completion(ha, i, j);
-                    gdth_stop_timeout(ha, i, j);
-                }
             } else {
                 spin_lock_irqsave(&ha->smp_lock, flags);
                 ha->raw[i].lock = 0;
                 spin_unlock_irqrestore(&ha->smp_lock, flags);
-                for (j = 0; j < ha->tid_cnt; ++j) {
-                    gdth_start_timeout(ha, i, j);
+		for (j = 0; j < ha->tid_cnt; ++j)
                     gdth_next(ha);
-                }
             }
         } 
         break;
@@ -4644,6 +4659,7 @@
         .slave_configure        = gdth_slave_configure,
         .bios_param             = gdth_bios_param,
         .proc_info              = gdth_proc_info,
+	.eh_timed_out		= gdth_timed_out,
         .proc_name              = "gdth",
         .can_queue              = GDTH_MAXCMDS,
         .this_id                = -1,

diff --git a/drivers/scsi/gdth.h b/drivers/scsi/gdth.h
index ca92476..1646444 100644
--- a/drivers/scsi/gdth.h
+++ b/drivers/scsi/gdth.h

@@ -916,7 +916,7 @@
         gdth_cmd_str *internal_cmd_str;         /* crier for internal messages*/
         dma_addr_t sense_paddr;                 /* sense dma-addr */
         unchar priority;
-        int timeout;
+	int timeout_count;			/* # of timeout calls */
         volatile int wait_for_completion;
         ushort status;
         ulong32 info;

diff --git a/drivers/scsi/gdth_proc.c b/drivers/scsi/gdth_proc.c
index ce0228e..59349a3 100644
--- a/drivers/scsi/gdth_proc.c
+++ b/drivers/scsi/gdth_proc.c

@@ -748,69 +748,3 @@
     }
     spin_unlock_irqrestore(&ha->smp_lock, flags);
 }
-
-static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id)
-{
-    ulong flags;
-    Scsi_Cmnd *scp;
-    unchar b, t;
-
-    spin_lock_irqsave(&ha->smp_lock, flags);
-
-    for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) {
-        struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp);
-        if (!cmndinfo->internal_command) {
-            b = scp->device->channel;
-            t = scp->device->id;
-            if (t == (unchar)id && b == (unchar)busnum) {
-                TRACE2(("gdth_stop_timeout(): update_timeout()\n"));
-                cmndinfo->timeout = gdth_update_timeout(scp, 0);
-            }
-        }
-    }
-    spin_unlock_irqrestore(&ha->smp_lock, flags);
-}
-
-static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id)
-{
-    ulong flags;
-    Scsi_Cmnd *scp;
-    unchar b, t;
-
-    spin_lock_irqsave(&ha->smp_lock, flags);
-
-    for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) {
-        struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp);
-        if (!cmndinfo->internal_command) {
-            b = scp->device->channel;
-            t = scp->device->id;
-            if (t == (unchar)id && b == (unchar)busnum) {
-                TRACE2(("gdth_start_timeout(): update_timeout()\n"));
-                gdth_update_timeout(scp, cmndinfo->timeout);
-            }
-        }
-    }
-    spin_unlock_irqrestore(&ha->smp_lock, flags);
-}
-
-static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout)
-{
-    int oldto;
-
-    oldto = scp->timeout_per_command;
-    scp->timeout_per_command = timeout;
-
-    if (timeout == 0) {
-        del_timer(&scp->eh_timeout);
-        scp->eh_timeout.data = (unsigned long) NULL;
-        scp->eh_timeout.expires = 0;
-    } else {
-        if (scp->eh_timeout.data != (unsigned long) NULL) 
-            del_timer(&scp->eh_timeout);
-        scp->eh_timeout.data = (unsigned long) scp;
-        scp->eh_timeout.expires = jiffies + timeout;
-        add_timer(&scp->eh_timeout);
-    }
-
-    return oldto;
-}

diff --git a/drivers/scsi/gdth_proc.h b/drivers/scsi/gdth_proc.h
index 45e6fda..9b900cc 100644
--- a/drivers/scsi/gdth_proc.h
+++ b/drivers/scsi/gdth_proc.h

@@ -20,9 +20,6 @@
                               ulong64 *paddr);
 static void gdth_ioctl_free(gdth_ha_str *ha, int size, char *buf, ulong64 paddr);
 static void gdth_wait_completion(gdth_ha_str *ha, int busnum, int id);
-static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id);
-static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id);
-static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout);
 
 #endif
 

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 7b1502c..87e09f3 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c

@@ -756,7 +756,7 @@
 	init_event_struct(evt_struct,
 			  handle_cmd_rsp,
 			  VIOSRP_SRP_FORMAT,
-			  cmnd->timeout_per_command/HZ);
+			  cmnd->request->timeout/HZ);
 
 	evt_struct->cmnd = cmnd;
 	evt_struct->cmnd_done = done;

diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 461331d..81c16cb 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c

@@ -612,7 +612,7 @@
 	pc->req_xfer = pc->buf_size = scsi_bufflen(cmd);
 	pc->scsi_cmd = cmd;
 	pc->done = done;
-	pc->timeout = jiffies + cmd->timeout_per_command;
+	pc->timeout = jiffies + cmd->request->timeout;
 
 	if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) {
 		printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number);

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index e7a3a65..d30eb7b 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c

@@ -3670,7 +3670,8 @@
 			sdev->no_uld_attach = 1;
 		}
 		if (ipr_is_vset_device(res)) {
-			sdev->timeout = IPR_VSET_RW_TIMEOUT;
+			blk_queue_rq_timeout(sdev->request_queue,
+					     IPR_VSET_RW_TIMEOUT);
 			blk_queue_max_sectors(sdev->request_queue, IPR_VSET_MAX_SECTORS);
 		}
 		if (ipr_is_vset_device(res) || ipr_is_scsi_disk(res))

diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index bc9e6dd..ef683f0 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c

@@ -3818,7 +3818,7 @@
 		scb->cmd.dcdb.segment_4G = 0;
 		scb->cmd.dcdb.enhanced_sg = 0;
 
-		TimeOut = scb->scsi_cmd->timeout_per_command;
+		TimeOut = scb->scsi_cmd->request->timeout;
 
 		if (ha->subsys->param[4] & 0x00100000) {	/* If NEW Tape DCDB is Supported */
 			if (!scb->sg_len) {

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 0b7457d..da7b67d 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c

@@ -1476,12 +1476,12 @@
 		scsi_queue_work(conn->session->host, &conn->xmitwork);
 }
 
-static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
+static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
 {
 	struct iscsi_cls_session *cls_session;
 	struct iscsi_session *session;
 	struct iscsi_conn *conn;
-	enum scsi_eh_timer_return rc = EH_NOT_HANDLED;
+	enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
 
 	cls_session = starget_to_session(scsi_target(scmd->device));
 	session = cls_session->dd_data;
@@ -1494,14 +1494,14 @@
 		 * We are probably in the middle of iscsi recovery so let
 		 * that complete and handle the error.
 		 */
-		rc = EH_RESET_TIMER;
+		rc = BLK_EH_RESET_TIMER;
 		goto done;
 	}
 
 	conn = session->leadconn;
 	if (!conn) {
 		/* In the middle of shuting down */
-		rc = EH_RESET_TIMER;
+		rc = BLK_EH_RESET_TIMER;
 		goto done;
 	}
 
@@ -1513,20 +1513,21 @@
 	 */
 	if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) +
 			    (conn->ping_timeout * HZ), jiffies))
-		rc = EH_RESET_TIMER;
+		rc = BLK_EH_RESET_TIMER;
 	/*
 	 * if we are about to check the transport then give the command
 	 * more time
 	 */
 	if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ),
 			   jiffies))
-		rc = EH_RESET_TIMER;
+		rc = BLK_EH_RESET_TIMER;
 	/* if in the middle of checking the transport then give us more time */
 	if (conn->ping_task)
-		rc = EH_RESET_TIMER;
+		rc = BLK_EH_RESET_TIMER;
 done:
 	spin_unlock(&session->lock);
-	debug_scsi("return %s\n", rc == EH_RESET_TIMER ? "timer reset" : "nh");
+	debug_scsi("return %s\n", rc == BLK_EH_RESET_TIMER ?
+					"timer reset" : "nh");
 	return rc;
 }
 

diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 48ee8c7..e155011 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c

@@ -294,10 +294,10 @@
 	}
 }
 
-static int sas_ata_scr_write(struct ata_port *ap, unsigned int sc_reg_in,
+static int sas_ata_scr_write(struct ata_link *link, unsigned int sc_reg_in,
 			      u32 val)
 {
-	struct domain_device *dev = ap->private_data;
+	struct domain_device *dev = link->ap->private_data;
 
 	SAS_DPRINTK("STUB %s\n", __func__);
 	switch (sc_reg_in) {
@@ -319,10 +319,10 @@
 	return 0;
 }
 
-static int sas_ata_scr_read(struct ata_port *ap, unsigned int sc_reg_in,
+static int sas_ata_scr_read(struct ata_link *link, unsigned int sc_reg_in,
 			    u32 *val)
 {
-	struct domain_device *dev = ap->private_data;
+	struct domain_device *dev = link->ap->private_data;
 
 	SAS_DPRINTK("STUB %s\n", __func__);
 	switch (sc_reg_in) {
@@ -398,7 +398,7 @@
 
 	/* Bounce SCSI-initiated commands to the SCSI EH */
 	if (qc->scsicmd) {
-		scsi_req_abort_cmd(qc->scsicmd);
+		blk_abort_request(qc->scsicmd->request);
 		scsi_schedule_eh(qc->scsicmd->device->host);
 		return;
 	}

diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index b4f9368..0001374 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h

@@ -55,7 +55,7 @@
 int  sas_register_ports(struct sas_ha_struct *sas_ha);
 void sas_unregister_ports(struct sas_ha_struct *sas_ha);
 
-enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *);
+enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *);
 
 int  sas_init_queue(struct sas_ha_struct *sas_ha);
 int  sas_init_events(struct sas_ha_struct *sas_ha);

diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index a8e3ef3..7448387 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c

@@ -673,43 +673,43 @@
 	return;
 }
 
-enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
+enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
 {
 	struct sas_task *task = TO_SAS_TASK(cmd);
 	unsigned long flags;
 
 	if (!task) {
-		cmd->timeout_per_command /= 2;
+		cmd->request->timeout /= 2;
 		SAS_DPRINTK("command 0x%p, task 0x%p, gone: %s\n",
-			    cmd, task, (cmd->timeout_per_command ?
-			    "EH_RESET_TIMER" : "EH_NOT_HANDLED"));
-		if (!cmd->timeout_per_command)
-			return EH_NOT_HANDLED;
-		return EH_RESET_TIMER;
+			    cmd, task, (cmd->request->timeout ?
+			    "BLK_EH_RESET_TIMER" : "BLK_EH_NOT_HANDLED"));
+		if (!cmd->request->timeout)
+			return BLK_EH_NOT_HANDLED;
+		return BLK_EH_RESET_TIMER;
 	}
 
 	spin_lock_irqsave(&task->task_state_lock, flags);
 	BUG_ON(task->task_state_flags & SAS_TASK_STATE_ABORTED);
 	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
 		spin_unlock_irqrestore(&task->task_state_lock, flags);
-		SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n",
-			    cmd, task);
-		return EH_HANDLED;
+		SAS_DPRINTK("command 0x%p, task 0x%p, timed out: "
+			    "BLK_EH_HANDLED\n", cmd, task);
+		return BLK_EH_HANDLED;
 	}
 	if (!(task->task_state_flags & SAS_TASK_AT_INITIATOR)) {
 		spin_unlock_irqrestore(&task->task_state_lock, flags);
 		SAS_DPRINTK("command 0x%p, task 0x%p, not at initiator: "
-			    "EH_RESET_TIMER\n",
+			    "BLK_EH_RESET_TIMER\n",
 			    cmd, task);
-		return EH_RESET_TIMER;
+		return BLK_EH_RESET_TIMER;
 	}
 	task->task_state_flags |= SAS_TASK_STATE_ABORTED;
 	spin_unlock_irqrestore(&task->task_state_lock, flags);
 
-	SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_NOT_HANDLED\n",
+	SAS_DPRINTK("command 0x%p, task 0x%p, timed out: BLK_EH_NOT_HANDLED\n",
 		    cmd, task);
 
-	return EH_NOT_HANDLED;
+	return BLK_EH_NOT_HANDLED;
 }
 
 int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
@@ -1039,7 +1039,7 @@
 		return;
 	}
 
-	scsi_req_abort_cmd(sc);
+	blk_abort_request(sc->request);
 	scsi_schedule_eh(sc->device->host);
 }
 

diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index 97b7633..afe1de9 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c

@@ -1167,7 +1167,7 @@
  * cmd has not been completed within the timeout period.
  */
 static enum
-scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
+blk_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
 {
 	struct megasas_cmd *cmd = (struct megasas_cmd *)scmd->SCp.ptr;
 	struct megasas_instance *instance;
@@ -1175,7 +1175,7 @@
 
 	if (time_after(jiffies, scmd->jiffies_at_alloc +
 				(MEGASAS_DEFAULT_CMD_TIMEOUT * 2) * HZ)) {
-		return EH_NOT_HANDLED;
+		return BLK_EH_NOT_HANDLED;
 	}
 
 	instance = cmd->instance;
@@ -1189,7 +1189,7 @@
 
 		spin_unlock_irqrestore(instance->host->host_lock, flags);
 	}
-	return EH_RESET_TIMER;
+	return BLK_EH_RESET_TIMER;
 }
 
 /**

diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c
index c57c94c..3b7240e 100644
--- a/drivers/scsi/ncr53c8xx.c
+++ b/drivers/scsi/ncr53c8xx.c

@@ -4170,8 +4170,8 @@
 	**
 	**----------------------------------------------------
 	*/
-	if (np->settle_time && cmd->timeout_per_command >= HZ) {
-		u_long tlimit = jiffies + cmd->timeout_per_command - HZ;
+	if (np->settle_time && cmd->request->timeout >= HZ) {
+		u_long tlimit = jiffies + cmd->request->timeout - HZ;
 		if (time_after(np->settle_time, tlimit))
 			np->settle_time = tlimit;
 	}

diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 37f9ba0..b6cd12b 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c

@@ -2845,7 +2845,7 @@
 	memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8));
 
 	/* Set ISP command timeout. */
-	pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ);
+	pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ);
 
 	/* Set device target ID and LUN */
 	pkt->lun = SCSI_LUN_32(cmd);
@@ -3114,7 +3114,7 @@
 	memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8));
 
 	/* Set ISP command timeout. */
-	pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ);
+	pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ);
 
 	/* Set device target ID and LUN */
 	pkt->lun = SCSI_LUN_32(cmd);

diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 88bebb1..de8279a 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c

@@ -1542,7 +1542,7 @@
 	DEBUG2(printk(KERN_INFO
 		      "scsi%ld: DEVICE_RESET cmd=%p jiffies = 0x%lx, to=%x,"
 		      "dpc_flags=%lx, status=%x allowed=%d\n", ha->host_no,
-		      cmd, jiffies, cmd->timeout_per_command / HZ,
+		      cmd, jiffies, cmd->request->timeout / HZ,
 		      ha->dpc_flags, cmd->result, cmd->allowed));
 
 	/* FIXME: wait for hba to go online */
@@ -1598,7 +1598,7 @@
 	DEBUG2(printk(KERN_INFO
 		      "scsi%ld: TARGET_DEVICE_RESET cmd=%p jiffies = 0x%lx, "
 		      "to=%x,dpc_flags=%lx, status=%x allowed=%d\n",
-		      ha->host_no, cmd, jiffies, cmd->timeout_per_command / HZ,
+		      ha->host_no, cmd, jiffies, cmd->request->timeout / HZ,
 		      ha->dpc_flags, cmd->result, cmd->allowed));
 
 	stat = qla4xxx_reset_target(ha, ddb_entry);

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 762a879..2ac3cb2 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c

@@ -291,7 +291,6 @@
 		unsigned long flags;
 
 		cmd->device = dev;
-		init_timer(&cmd->eh_timeout);
 		INIT_LIST_HEAD(&cmd->list);
 		spin_lock_irqsave(&dev->list_lock, flags);
 		list_add_tail(&cmd->list, &dev->cmd_list);
@@ -652,14 +651,19 @@
 	unsigned long timeout;
 	int rtn = 0;
 
+	/*
+	 * We will use a queued command if possible, otherwise we will
+	 * emulate the queuing and calling of completion function ourselves.
+	 */
+	atomic_inc(&cmd->device->iorequest_cnt);
+
 	/* check if the device is still usable */
 	if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
 		/* in SDEV_DEL we error all commands. DID_NO_CONNECT
 		 * returns an immediate error upwards, and signals
 		 * that the device is no longer present */
 		cmd->result = DID_NO_CONNECT << 16;
-		atomic_inc(&cmd->device->iorequest_cnt);
-		__scsi_done(cmd);
+		scsi_done(cmd);
 		/* return 0 (because the command has been processed) */
 		goto out;
 	}
@@ -673,6 +677,7 @@
 		 * occur until the device transitions out of the
 		 * suspend state.
 		 */
+
 		scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
 
 		SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked \n"));
@@ -715,21 +720,9 @@
 		host->resetting = 0;
 	}
 
-	/* 
-	 * AK: unlikely race here: for some reason the timer could
-	 * expire before the serial number is set up below.
-	 */
-	scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out);
-
 	scsi_log_send(cmd);
 
 	/*
-	 * We will use a queued command if possible, otherwise we will
-	 * emulate the queuing and calling of completion function ourselves.
-	 */
-	atomic_inc(&cmd->device->iorequest_cnt);
-
-	/*
 	 * Before we queue this command, check if the command
 	 * length exceeds what the host adapter can handle.
 	 */
@@ -745,6 +738,12 @@
 	}
 
 	spin_lock_irqsave(host->host_lock, flags);
+	/*
+	 * AK: unlikely race here: for some reason the timer could
+	 * expire before the serial number is set up below.
+	 *
+	 * TODO: kill serial or move to blk layer
+	 */
 	scsi_cmd_get_serial(host, cmd); 
 
 	if (unlikely(host->shost_state == SHOST_DEL)) {
@@ -755,12 +754,8 @@
 	}
 	spin_unlock_irqrestore(host->host_lock, flags);
 	if (rtn) {
-		if (scsi_delete_timer(cmd)) {
-			atomic_inc(&cmd->device->iodone_cnt);
-			scsi_queue_insert(cmd,
-					  (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ?
-					  rtn : SCSI_MLQUEUE_HOST_BUSY);
-		}
+		scsi_queue_insert(cmd, (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ?
+						rtn : SCSI_MLQUEUE_HOST_BUSY);
 		SCSI_LOG_MLQUEUE(3,
 		    printk("queuecommand : request rejected\n"));
 	}
@@ -771,24 +766,6 @@
 }
 
 /**
- * scsi_req_abort_cmd -- Request command recovery for the specified command
- * @cmd: pointer to the SCSI command of interest
- *
- * This function requests that SCSI Core start recovery for the
- * command by deleting the timer and adding the command to the eh
- * queue.  It can be called by either LLDDs or SCSI Core.  LLDDs who
- * implement their own error recovery MAY ignore the timeout event if
- * they generated scsi_req_abort_cmd.
- */
-void scsi_req_abort_cmd(struct scsi_cmnd *cmd)
-{
-	if (!scsi_delete_timer(cmd))
-		return;
-	scsi_times_out(cmd);
-}
-EXPORT_SYMBOL(scsi_req_abort_cmd);
-
-/**
  * scsi_done - Enqueue the finished SCSI command into the done queue.
  * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives
  * ownership back to SCSI Core -- i.e. the LLDD has finished with it.
@@ -803,42 +780,7 @@
  */
 static void scsi_done(struct scsi_cmnd *cmd)
 {
-	/*
-	 * We don't have to worry about this one timing out anymore.
-	 * If we are unable to remove the timer, then the command
-	 * has already timed out.  In which case, we have no choice but to
-	 * let the timeout function run, as we have no idea where in fact
-	 * that function could really be.  It might be on another processor,
-	 * etc, etc.
-	 */
-	if (!scsi_delete_timer(cmd))
-		return;
-	__scsi_done(cmd);
-}
-
-/* Private entry to scsi_done() to complete a command when the timer
- * isn't running --- used by scsi_times_out */
-void __scsi_done(struct scsi_cmnd *cmd)
-{
-	struct request *rq = cmd->request;
-
-	/*
-	 * Set the serial numbers back to zero
-	 */
-	cmd->serial_number = 0;
-
-	atomic_inc(&cmd->device->iodone_cnt);
-	if (cmd->result)
-		atomic_inc(&cmd->device->ioerr_cnt);
-
-	BUG_ON(!rq);
-
-	/*
-	 * The uptodate/nbytes values don't matter, as we allow partial
-	 * completes and thus will check this in the softirq callback
-	 */
-	rq->completion_data = cmd;
-	blk_complete_request(rq);
+	blk_complete_request(cmd->request);
 }
 
 /* Move this to a header if it becomes more generally useful */

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 39ce3ab..fecefa0 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c

@@ -112,69 +112,8 @@
 }
 
 /**
- * scsi_add_timer - Start timeout timer for a single scsi command.
- * @scmd:	scsi command that is about to start running.
- * @timeout:	amount of time to allow this command to run.
- * @complete:	timeout function to call if timer isn't canceled.
- *
- * Notes:
- *    This should be turned into an inline function.  Each scsi command
- *    has its own timer, and as it is added to the queue, we set up the
- *    timer.  When the command completes, we cancel the timer.
- */
-void scsi_add_timer(struct scsi_cmnd *scmd, int timeout,
-		    void (*complete)(struct scsi_cmnd *))
-{
-
-	/*
-	 * If the clock was already running for this command, then
-	 * first delete the timer.  The timer handling code gets rather
-	 * confused if we don't do this.
-	 */
-	if (scmd->eh_timeout.function)
-		del_timer(&scmd->eh_timeout);
-
-	scmd->eh_timeout.data = (unsigned long)scmd;
-	scmd->eh_timeout.expires = jiffies + timeout;
-	scmd->eh_timeout.function = (void (*)(unsigned long)) complete;
-
-	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:"
-					  " %d, (%p)\n", __func__,
-					  scmd, timeout, complete));
-
-	add_timer(&scmd->eh_timeout);
-}
-
-/**
- * scsi_delete_timer - Delete/cancel timer for a given function.
- * @scmd:	Cmd that we are canceling timer for
- *
- * Notes:
- *     This should be turned into an inline function.
- *
- * Return value:
- *     1 if we were able to detach the timer.  0 if we blew it, and the
- *     timer function has already started to run.
- */
-int scsi_delete_timer(struct scsi_cmnd *scmd)
-{
-	int rtn;
-
-	rtn = del_timer(&scmd->eh_timeout);
-
-	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p,"
-					 " rtn: %d\n", __func__,
-					 scmd, rtn));
-
-	scmd->eh_timeout.data = (unsigned long)NULL;
-	scmd->eh_timeout.function = NULL;
-
-	return rtn;
-}
-
-/**
  * scsi_times_out - Timeout function for normal scsi commands.
- * @scmd:	Cmd that is timing out.
+ * @req:	request that is timing out.
  *
  * Notes:
  *     We do not need to lock this.  There is the potential for a race
@@ -182,9 +121,11 @@
  *     normal completion function determines that the timer has already
  *     fired, then it mustn't do anything.
  */
-void scsi_times_out(struct scsi_cmnd *scmd)
+enum blk_eh_timer_return scsi_times_out(struct request *req)
 {
-	enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *);
+	struct scsi_cmnd *scmd = req->special;
+	enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *);
+	enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED;
 
 	scsi_log_completion(scmd, TIMEOUT_ERROR);
 
@@ -196,22 +137,20 @@
 		eh_timed_out = NULL;
 
 	if (eh_timed_out)
-		switch (eh_timed_out(scmd)) {
-		case EH_HANDLED:
-			__scsi_done(scmd);
-			return;
-		case EH_RESET_TIMER:
-			scsi_add_timer(scmd, scmd->timeout_per_command,
-				       scsi_times_out);
-			return;
-		case EH_NOT_HANDLED:
+		rtn = eh_timed_out(scmd);
+		switch (rtn) {
+		case BLK_EH_NOT_HANDLED:
 			break;
+		default:
+			return rtn;
 		}
 
 	if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
 		scmd->result |= DID_TIME_OUT << 16;
-		__scsi_done(scmd);
+		return BLK_EH_HANDLED;
 	}
+
+	return BLK_EH_NOT_HANDLED;
 }
 
 /**
@@ -1793,7 +1732,6 @@
 
 	blk_rq_init(NULL, &req);
 	scmd->request = &req;
-	memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout));
 
 	scmd->cmnd = req.cmd;
 
@@ -1804,8 +1742,6 @@
 
 	scmd->sc_data_direction		= DMA_BIDIRECTIONAL;
 
-	init_timer(&scmd->eh_timeout);
-
 	spin_lock_irqsave(shost->host_lock, flags);
 	shost->tmf_in_progress = 1;
 	spin_unlock_irqrestore(shost->host_lock, flags);

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d2884bf..98ee55c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c

@@ -1181,7 +1181,6 @@
 	
 	cmd->transfersize = req->data_len;
 	cmd->allowed = req->retries;
-	cmd->timeout_per_command = req->timeout;
 	return BLKPREP_OK;
 }
 EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd);
@@ -1417,17 +1416,26 @@
 	spin_unlock(shost->host_lock);
 	spin_lock(sdev->request_queue->queue_lock);
 
-	__scsi_done(cmd);
+	blk_complete_request(req);
 }
 
 static void scsi_softirq_done(struct request *rq)
 {
-	struct scsi_cmnd *cmd = rq->completion_data;
-	unsigned long wait_for = (cmd->allowed + 1) * cmd->timeout_per_command;
+	struct scsi_cmnd *cmd = rq->special;
+	unsigned long wait_for = (cmd->allowed + 1) * rq->timeout;
 	int disposition;
 
 	INIT_LIST_HEAD(&cmd->eh_entry);
 
+	/*
+	 * Set the serial numbers back to zero
+	 */
+	cmd->serial_number = 0;
+
+	atomic_inc(&cmd->device->iodone_cnt);
+	if (cmd->result)
+		atomic_inc(&cmd->device->ioerr_cnt);
+
 	disposition = scsi_decide_disposition(cmd);
 	if (disposition != SUCCESS &&
 	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
@@ -1676,6 +1684,7 @@
 
 	blk_queue_prep_rq(q, scsi_prep_fn);
 	blk_queue_softirq_done(q, scsi_softirq_done);
+	blk_queue_rq_timed_out(q, scsi_times_out);
 	return q;
 }
 

diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 79f0f75..6cddd5d 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h

@@ -4,6 +4,7 @@
 #include <linux/device.h>
 
 struct request_queue;
+struct request;
 struct scsi_cmnd;
 struct scsi_device;
 struct scsi_host_template;
@@ -27,7 +28,6 @@
 extern int scsi_dispatch_cmd(struct scsi_cmnd *cmd);
 extern int scsi_setup_command_freelist(struct Scsi_Host *shost);
 extern void scsi_destroy_command_freelist(struct Scsi_Host *shost);
-extern void __scsi_done(struct scsi_cmnd *cmd);
 #ifdef CONFIG_SCSI_LOGGING
 void scsi_log_send(struct scsi_cmnd *cmd);
 void scsi_log_completion(struct scsi_cmnd *cmd, int disposition);
@@ -49,10 +49,7 @@
 extern void scsi_exit_devinfo(void);
 
 /* scsi_error.c */
-extern void scsi_add_timer(struct scsi_cmnd *, int,
-		void (*)(struct scsi_cmnd *));
-extern int scsi_delete_timer(struct scsi_cmnd *);
-extern void scsi_times_out(struct scsi_cmnd *cmd);
+extern enum blk_eh_timer_return scsi_times_out(struct request *req);
 extern int scsi_error_handler(void *host);
 extern int scsi_decide_disposition(struct scsi_cmnd *cmd);
 extern void scsi_eh_wakeup(struct Scsi_Host *shost);

diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 09d311d..93c28f3 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c

@@ -561,12 +561,15 @@
 sdev_rd_attr (model, "%.16s\n");
 sdev_rd_attr (rev, "%.4s\n");
 
+/*
+ * TODO: can we make these symlinks to the block layer ones?
+ */
 static ssize_t
 sdev_show_timeout (struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct scsi_device *sdev;
 	sdev = to_scsi_device(dev);
-	return snprintf (buf, 20, "%d\n", sdev->timeout / HZ);
+	return snprintf(buf, 20, "%d\n", sdev->request_queue->rq_timeout / HZ);
 }
 
 static ssize_t
@@ -577,7 +580,7 @@
 	int timeout;
 	sdev = to_scsi_device(dev);
 	sscanf (buf, "%d\n", &timeout);
-	sdev->timeout = timeout * HZ;
+	blk_queue_rq_timeout(sdev->request_queue, timeout * HZ);
 	return count;
 }
 static DEVICE_ATTR(timeout, S_IRUGO | S_IWUSR, sdev_show_timeout, sdev_store_timeout);

diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c
index f26299d..48ba413 100644
--- a/drivers/scsi/scsi_tgt_lib.c
+++ b/drivers/scsi/scsi_tgt_lib.c

@@ -362,7 +362,7 @@
 	int err;
 
 	dprintk("%lx %u\n", uaddr, len);
-	err = blk_rq_map_user(q, rq, (void *)uaddr, len);
+	err = blk_rq_map_user(q, rq, NULL, (void *)uaddr, len, GFP_KERNEL);
 	if (err) {
 		/*
 		 * TODO: need to fixup sg_tablesize, max_segment_size,

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index cb971f0..d5f7653 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c

@@ -1926,15 +1926,15 @@
  * Notes:
  *	This routine assumes no locks are held on entry.
  */
-static enum scsi_eh_timer_return
+static enum blk_eh_timer_return
 fc_timed_out(struct scsi_cmnd *scmd)
 {
 	struct fc_rport *rport = starget_to_rport(scsi_target(scmd->device));
 
 	if (rport->port_state == FC_PORTSTATE_BLOCKED)
-		return EH_RESET_TIMER;
+		return BLK_EH_RESET_TIMER;
 
-	return EH_NOT_HANDLED;
+	return BLK_EH_NOT_HANDLED;
 }
 
 /*

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index af9e406..a7b53be 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c

@@ -87,6 +87,12 @@
 MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD);
 MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC);
 
+#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
+#define SD_MINORS	16
+#else
+#define SD_MINORS	0
+#endif
+
 static int  sd_revalidate_disk(struct gendisk *);
 static int  sd_probe(struct device *);
 static int  sd_remove(struct device *);
@@ -160,7 +166,7 @@
 			sd_print_sense_hdr(sdkp, &sshdr);
 		return -EINVAL;
 	}
-	sd_revalidate_disk(sdkp->disk);
+	revalidate_disk(sdkp->disk);
 	return count;
 }
 
@@ -378,7 +384,6 @@
 	sector_t block = rq->sector;
 	sector_t threshold;
 	unsigned int this_count = rq->nr_sectors;
-	unsigned int timeout = sdp->timeout;
 	int ret;
 
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
@@ -579,7 +584,6 @@
 	SCpnt->transfersize = sdp->sector_size;
 	SCpnt->underflow = this_count << 9;
 	SCpnt->allowed = SD_MAX_RETRIES;
-	SCpnt->timeout_per_command = timeout;
 
 	/*
 	 * This indicates that the command is ready from our end to be
@@ -911,7 +915,7 @@
 	struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
 
 	if (sdkp) {
-		sd_revalidate_disk(sdkp->disk);
+		revalidate_disk(sdkp->disk);
 		scsi_disk_put(sdkp);
 	}
 }
@@ -1759,6 +1763,52 @@
 }
 
 /**
+ *	sd_format_disk_name - format disk name
+ *	@prefix: name prefix - ie. "sd" for SCSI disks
+ *	@index: index of the disk to format name for
+ *	@buf: output buffer
+ *	@buflen: length of the output buffer
+ *
+ *	SCSI disk names starts at sda.  The 26th device is sdz and the
+ *	27th is sdaa.  The last one for two lettered suffix is sdzz
+ *	which is followed by sdaaa.
+ *
+ *	This is basically 26 base counting with one extra 'nil' entry
+ *	at the beggining from the second digit on and can be
+ *	determined using similar method as 26 base conversion with the
+ *	index shifted -1 after each digit is computed.
+ *
+ *	CONTEXT:
+ *	Don't care.
+ *
+ *	RETURNS:
+ *	0 on success, -errno on failure.
+ */
+static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen)
+{
+	const int base = 'z' - 'a' + 1;
+	char *begin = buf + strlen(prefix);
+	char *end = buf + buflen;
+	char *p;
+	int unit;
+
+	p = end - 1;
+	*p = '\0';
+	unit = base;
+	do {
+		if (p == begin)
+			return -EINVAL;
+		*--p = 'a' + (index % unit);
+		index = (index / unit) - 1;
+	} while (index >= 0);
+
+	memmove(begin, p, end - p);
+	memcpy(buf, prefix, strlen(prefix));
+
+	return 0;
+}
+
+/**
  *	sd_probe - called during driver initialization and whenever a
  *	new scsi device is attached to the system. It is called once
  *	for each scsi device (not just disks) present.
@@ -1796,7 +1846,7 @@
 	if (!sdkp)
 		goto out;
 
-	gd = alloc_disk(16);
+	gd = alloc_disk(SD_MINORS);
 	if (!gd)
 		goto out_free;
 
@@ -1810,8 +1860,8 @@
 	if (error)
 		goto out_put;
 
-	error = -EBUSY;
-	if (index >= SD_MAX_DISKS)
+	error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN);
+	if (error)
 		goto out_free_index;
 
 	sdkp->device = sdp;
@@ -1821,11 +1871,12 @@
 	sdkp->openers = 0;
 	sdkp->previous_state = 1;
 
-	if (!sdp->timeout) {
+	if (!sdp->request_queue->rq_timeout) {
 		if (sdp->type != TYPE_MOD)
-			sdp->timeout = SD_TIMEOUT;
+			blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT);
 		else
-			sdp->timeout = SD_MOD_TIMEOUT;
+			blk_queue_rq_timeout(sdp->request_queue,
+					     SD_MOD_TIMEOUT);
 	}
 
 	device_initialize(&sdkp->dev);
@@ -1838,24 +1889,12 @@
 
 	get_device(&sdp->sdev_gendev);
 
-	gd->major = sd_major((index & 0xf0) >> 4);
-	gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
-	gd->minors = 16;
-	gd->fops = &sd_fops;
-
-	if (index < 26) {
-		sprintf(gd->disk_name, "sd%c", 'a' + index % 26);
-	} else if (index < (26 + 1) * 26) {
-		sprintf(gd->disk_name, "sd%c%c",
-			'a' + index / 26 - 1,'a' + index % 26);
-	} else {
-		const unsigned int m1 = (index / 26 - 1) / 26 - 1;
-		const unsigned int m2 = (index / 26 - 1) % 26;
-		const unsigned int m3 =  index % 26;
-		sprintf(gd->disk_name, "sd%c%c%c",
-			'a' + m1, 'a' + m2, 'a' + m3);
+	if (index < SD_MAX_DISKS) {
+		gd->major = sd_major((index & 0xf0) >> 4);
+		gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
+		gd->minors = SD_MINORS;
 	}
-
+	gd->fops = &sd_fops;
 	gd->private_data = &sdkp->driver;
 	gd->queue = sdkp->device->request_queue;
 
@@ -1864,7 +1903,7 @@
 	blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
 
 	gd->driverfs_dev = &sdp->sdev_gendev;
-	gd->flags = GENHD_FL_DRIVERFS;
+	gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS;
 	if (sdp->removable)
 		gd->flags |= GENHD_FL_REMOVABLE;
 

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 661f9f2..ba9b9bb 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c

@@ -47,7 +47,6 @@
 #include <linux/seq_file.h>
 #include <linux/blkdev.h>
 #include <linux/delay.h>
-#include <linux/scatterlist.h>
 #include <linux/blktrace_api.h>
 #include <linux/smp_lock.h>
 
@@ -69,7 +68,6 @@
 #endif
 
 #define SG_ALLOW_DIO_DEF 0
-#define SG_ALLOW_DIO_CODE /* compile out by commenting this define */
 
 #define SG_MAX_DEVS 32768
 
@@ -118,8 +116,8 @@
 	unsigned short k_use_sg; /* Count of kernel scatter-gather pieces */
 	unsigned sglist_len; /* size of malloc'd scatter-gather list ++ */
 	unsigned bufflen;	/* Size of (aggregate) data buffer */
-	unsigned b_malloc_len;	/* actual len malloc'ed in buffer */
-	struct scatterlist *buffer;/* scatter list */
+	struct page **pages;
+	int page_order;
 	char dio_in_use;	/* 0->indirect IO (or mmap), 1->dio */
 	unsigned char cmd_opcode; /* first byte of command */
 } Sg_scatter_hold;
@@ -137,6 +135,8 @@
 	char orphan;		/* 1 -> drop on sight, 0 -> normal */
 	char sg_io_owned;	/* 1 -> packet belongs to SG_IO */
 	volatile char done;	/* 0->before bh, 1->before read, 2->read */
+	struct request *rq;
+	struct bio *bio;
 } Sg_request;
 
 typedef struct sg_fd {		/* holds the state of a file descriptor */
@@ -175,8 +175,8 @@
 
 static int sg_fasync(int fd, struct file *filp, int mode);
 /* tasklet or soft irq callback */
-static void sg_cmd_done(void *data, char *sense, int result, int resid);
-static int sg_start_req(Sg_request * srp);
+static void sg_rq_end_io(struct request *rq, int uptodate);
+static int sg_start_req(Sg_request *srp, unsigned char *cmd);
 static void sg_finish_rem_req(Sg_request * srp);
 static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
 static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp,
@@ -188,17 +188,11 @@
 			int read_only, Sg_request **o_srp);
 static int sg_common_write(Sg_fd * sfp, Sg_request * srp,
 			   unsigned char *cmnd, int timeout, int blocking);
-static int sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind,
-		      int wr_xf, int *countp, unsigned char __user **up);
-static int sg_write_xfer(Sg_request * srp);
-static int sg_read_xfer(Sg_request * srp);
 static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer);
 static void sg_remove_scat(Sg_scatter_hold * schp);
 static void sg_build_reserve(Sg_fd * sfp, int req_size);
 static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size);
 static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp);
-static struct page *sg_page_malloc(int rqSz, int lowDma, int *retSzp);
-static void sg_page_free(struct page *page, int size);
 static Sg_fd *sg_add_sfp(Sg_device * sdp, int dev);
 static int sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp);
 static void __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp);
@@ -206,7 +200,6 @@
 static Sg_request *sg_add_request(Sg_fd * sfp);
 static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
 static int sg_res_in_use(Sg_fd * sfp);
-static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len);
 static Sg_device *sg_get_dev(int dev);
 #ifdef CONFIG_SCSI_PROC_FS
 static int sg_last_dev(void);
@@ -529,8 +522,7 @@
 		err = -EFAULT;
 		goto err_out;
 	}
-	err = sg_read_xfer(srp);
-      err_out:
+err_out:
 	sg_finish_rem_req(srp);
 	return (0 == err) ? count : err;
 }
@@ -612,7 +604,10 @@
 	else
 		hp->dxfer_direction = (mxsize > 0) ? SG_DXFER_FROM_DEV : SG_DXFER_NONE;
 	hp->dxfer_len = mxsize;
-	hp->dxferp = (char __user *)buf + cmd_size;
+	if (hp->dxfer_direction == SG_DXFER_TO_DEV)
+		hp->dxferp = (char __user *)buf + cmd_size;
+	else
+		hp->dxferp = NULL;
 	hp->sbp = NULL;
 	hp->timeout = old_hdr.reply_len;	/* structure abuse ... */
 	hp->flags = input_size;	/* structure abuse ... */
@@ -732,16 +727,12 @@
 	SCSI_LOG_TIMEOUT(4, printk("sg_common_write:  scsi opcode=0x%02x, cmd_size=%d\n",
 			  (int) cmnd[0], (int) hp->cmd_len));
 
-	if ((k = sg_start_req(srp))) {
+	k = sg_start_req(srp, cmnd);
+	if (k) {
 		SCSI_LOG_TIMEOUT(1, printk("sg_common_write: start_req err=%d\n", k));
 		sg_finish_rem_req(srp);
 		return k;	/* probably out of space --> ENOMEM */
 	}
-	if ((k = sg_write_xfer(srp))) {
-		SCSI_LOG_TIMEOUT(1, printk("sg_common_write: write_xfer, bad address\n"));
-		sg_finish_rem_req(srp);
-		return k;
-	}
 	if (sdp->detached) {
 		sg_finish_rem_req(srp);
 		return -ENODEV;
@@ -763,20 +754,11 @@
 		break;
 	}
 	hp->duration = jiffies_to_msecs(jiffies);
-/* Now send everything of to mid-level. The next time we hear about this
-   packet is when sg_cmd_done() is called (i.e. a callback). */
-	if (scsi_execute_async(sdp->device, cmnd, hp->cmd_len, data_dir, srp->data.buffer,
-				hp->dxfer_len, srp->data.k_use_sg, timeout,
-				SG_DEFAULT_RETRIES, srp, sg_cmd_done,
-				GFP_ATOMIC)) {
-		SCSI_LOG_TIMEOUT(1, printk("sg_common_write: scsi_execute_async failed\n"));
-		/*
-		 * most likely out of mem, but could also be a bad map
-		 */
-		sg_finish_rem_req(srp);
-		return -ENOMEM;
-	} else
-		return 0;
+
+	srp->rq->timeout = timeout;
+	blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk,
+			      srp->rq, 1, sg_rq_end_io);
+	return 0;
 }
 
 static int
@@ -1192,8 +1174,7 @@
 	Sg_fd *sfp;
 	unsigned long offset, len, sa;
 	Sg_scatter_hold *rsv_schp;
-	struct scatterlist *sg;
-	int k;
+	int k, length;
 
 	if ((NULL == vma) || (!(sfp = (Sg_fd *) vma->vm_private_data)))
 		return VM_FAULT_SIGBUS;
@@ -1203,15 +1184,14 @@
 		return VM_FAULT_SIGBUS;
 	SCSI_LOG_TIMEOUT(3, printk("sg_vma_fault: offset=%lu, scatg=%d\n",
 				   offset, rsv_schp->k_use_sg));
-	sg = rsv_schp->buffer;
 	sa = vma->vm_start;
-	for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end);
-	     ++k, sg = sg_next(sg)) {
+	length = 1 << (PAGE_SHIFT + rsv_schp->page_order);
+	for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) {
 		len = vma->vm_end - sa;
-		len = (len < sg->length) ? len : sg->length;
+		len = (len < length) ? len : length;
 		if (offset < len) {
-			struct page *page;
-			page = virt_to_page(page_address(sg_page(sg)) + offset);
+			struct page *page = nth_page(rsv_schp->pages[k],
+						     offset >> PAGE_SHIFT);
 			get_page(page);	/* increment page count */
 			vmf->page = page;
 			return 0; /* success */
@@ -1233,8 +1213,7 @@
 	Sg_fd *sfp;
 	unsigned long req_sz, len, sa;
 	Sg_scatter_hold *rsv_schp;
-	int k;
-	struct scatterlist *sg;
+	int k, length;
 
 	if ((!filp) || (!vma) || (!(sfp = (Sg_fd *) filp->private_data)))
 		return -ENXIO;
@@ -1248,11 +1227,10 @@
 		return -ENOMEM;	/* cannot map more than reserved buffer */
 
 	sa = vma->vm_start;
-	sg = rsv_schp->buffer;
-	for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end);
-	     ++k, sg = sg_next(sg)) {
+	length = 1 << (PAGE_SHIFT + rsv_schp->page_order);
+	for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) {
 		len = vma->vm_end - sa;
-		len = (len < sg->length) ? len : sg->length;
+		len = (len < length) ? len : length;
 		sa += len;
 	}
 
@@ -1263,16 +1241,19 @@
 	return 0;
 }
 
-/* This function is a "bottom half" handler that is called by the
- * mid level when a command is completed (or has failed). */
-static void
-sg_cmd_done(void *data, char *sense, int result, int resid)
+/*
+ * This function is a "bottom half" handler that is called by the mid
+ * level when a command is completed (or has failed).
+ */
+static void sg_rq_end_io(struct request *rq, int uptodate)
 {
-	Sg_request *srp = data;
+	struct sg_request *srp = rq->end_io_data;
 	Sg_device *sdp = NULL;
 	Sg_fd *sfp;
 	unsigned long iflags;
 	unsigned int ms;
+	char *sense;
+	int result, resid;
 
 	if (NULL == srp) {
 		printk(KERN_ERR "sg_cmd_done: NULL request\n");
@@ -1286,6 +1267,9 @@
 		return;
 	}
 
+	sense = rq->sense;
+	result = rq->errors;
+	resid = rq->data_len;
 
 	SCSI_LOG_TIMEOUT(4, printk("sg_cmd_done: %s, pack_id=%d, res=0x%x\n",
 		sdp->disk->disk_name, srp->header.pack_id, result));
@@ -1296,7 +1280,6 @@
 	if (0 != result) {
 		struct scsi_sense_hdr sshdr;
 
-		memcpy(srp->sense_b, sense, sizeof (srp->sense_b));
 		srp->header.status = 0xff & result;
 		srp->header.masked_status = status_byte(result);
 		srp->header.msg_status = msg_byte(result);
@@ -1634,37 +1617,79 @@
 	idr_destroy(&sg_index_idr);
 }
 
-static int
-sg_start_req(Sg_request * srp)
+static int sg_start_req(Sg_request *srp, unsigned char *cmd)
 {
 	int res;
+	struct request *rq;
 	Sg_fd *sfp = srp->parentfp;
 	sg_io_hdr_t *hp = &srp->header;
 	int dxfer_len = (int) hp->dxfer_len;
 	int dxfer_dir = hp->dxfer_direction;
+	unsigned int iov_count = hp->iovec_count;
 	Sg_scatter_hold *req_schp = &srp->data;
 	Sg_scatter_hold *rsv_schp = &sfp->reserve;
+	struct request_queue *q = sfp->parentdp->device->request_queue;
+	struct rq_map_data *md, map_data;
+	int rw = hp->dxfer_direction == SG_DXFER_TO_DEV ? WRITE : READ;
 
-	SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len));
+	SCSI_LOG_TIMEOUT(4, printk(KERN_INFO "sg_start_req: dxfer_len=%d\n",
+				   dxfer_len));
+
+	rq = blk_get_request(q, rw, GFP_ATOMIC);
+	if (!rq)
+		return -ENOMEM;
+
+	memcpy(rq->cmd, cmd, hp->cmd_len);
+
+	rq->cmd_len = hp->cmd_len;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+
+	srp->rq = rq;
+	rq->end_io_data = srp;
+	rq->sense = srp->sense_b;
+	rq->retries = SG_DEFAULT_RETRIES;
+
 	if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE))
 		return 0;
-	if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) &&
-	    (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) &&
-	    (!sfp->parentdp->device->host->unchecked_isa_dma)) {
-		res = sg_build_direct(srp, sfp, dxfer_len);
-		if (res <= 0)	/* -ve -> error, 0 -> done, 1 -> try indirect */
-			return res;
+
+	if (sg_allow_dio && hp->flags & SG_FLAG_DIRECT_IO &&
+	    dxfer_dir != SG_DXFER_UNKNOWN && !iov_count &&
+	    !sfp->parentdp->device->host->unchecked_isa_dma &&
+	    blk_rq_aligned(q, hp->dxferp, dxfer_len))
+		md = NULL;
+	else
+		md = &map_data;
+
+	if (md) {
+		if (!sg_res_in_use(sfp) && dxfer_len <= rsv_schp->bufflen)
+			sg_link_reserve(sfp, srp, dxfer_len);
+		else {
+			res = sg_build_indirect(req_schp, sfp, dxfer_len);
+			if (res)
+				return res;
+		}
+
+		md->pages = req_schp->pages;
+		md->page_order = req_schp->page_order;
+		md->nr_entries = req_schp->k_use_sg;
 	}
-	if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen))
-		sg_link_reserve(sfp, srp, dxfer_len);
-	else {
-		res = sg_build_indirect(req_schp, sfp, dxfer_len);
-		if (res) {
-			sg_remove_scat(req_schp);
-			return res;
+
+	if (iov_count)
+		res = blk_rq_map_user_iov(q, rq, md, hp->dxferp, iov_count,
+					  hp->dxfer_len, GFP_ATOMIC);
+	else
+		res = blk_rq_map_user(q, rq, md, hp->dxferp,
+				      hp->dxfer_len, GFP_ATOMIC);
+
+	if (!res) {
+		srp->bio = rq->bio;
+
+		if (!md) {
+			req_schp->dio_in_use = 1;
+			hp->info |= SG_INFO_DIRECT_IO;
 		}
 	}
-	return 0;
+	return res;
 }
 
 static void
@@ -1678,186 +1703,37 @@
 		sg_unlink_reserve(sfp, srp);
 	else
 		sg_remove_scat(req_schp);
+
+	if (srp->rq) {
+		if (srp->bio)
+			blk_rq_unmap_user(srp->bio);
+
+		blk_put_request(srp->rq);
+	}
+
 	sg_remove_request(sfp, srp);
 }
 
 static int
 sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize)
 {
-	int sg_bufflen = tablesize * sizeof(struct scatterlist);
+	int sg_bufflen = tablesize * sizeof(struct page *);
 	gfp_t gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
 
-	/*
-	 * TODO: test without low_dma, we should not need it since
-	 * the block layer will bounce the buffer for us
-	 *
-	 * XXX(hch): we shouldn't need GFP_DMA for the actual S/G list.
-	 */
-	if (sfp->low_dma)
-		 gfp_flags |= GFP_DMA;
-	schp->buffer = kzalloc(sg_bufflen, gfp_flags);
-	if (!schp->buffer)
+	schp->pages = kzalloc(sg_bufflen, gfp_flags);
+	if (!schp->pages)
 		return -ENOMEM;
-	sg_init_table(schp->buffer, tablesize);
 	schp->sglist_len = sg_bufflen;
 	return tablesize;	/* number of scat_gath elements allocated */
 }
 
-#ifdef SG_ALLOW_DIO_CODE
-/* vvvvvvvv  following code borrowed from st driver's direct IO vvvvvvvvv */
-	/* TODO: hopefully we can use the generic block layer code */
-
-/* Pin down user pages and put them into a scatter gather list. Returns <= 0 if
-   - mapping of all pages not successful
-   (i.e., either completely successful or fails)
-*/
-static int 
-st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages, 
-	          unsigned long uaddr, size_t count, int rw)
-{
-	unsigned long end = (uaddr + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	unsigned long start = uaddr >> PAGE_SHIFT;
-	const int nr_pages = end - start;
-	int res, i, j;
-	struct page **pages;
-
-	/* User attempted Overflow! */
-	if ((uaddr + count) < uaddr)
-		return -EINVAL;
-
-	/* Too big */
-        if (nr_pages > max_pages)
-		return -ENOMEM;
-
-	/* Hmm? */
-	if (count == 0)
-		return 0;
-
-	if ((pages = kmalloc(max_pages * sizeof(*pages), GFP_ATOMIC)) == NULL)
-		return -ENOMEM;
-
-        /* Try to fault in all of the necessary pages */
-	down_read(&current->mm->mmap_sem);
-        /* rw==READ means read from drive, write into memory area */
-	res = get_user_pages(
-		current,
-		current->mm,
-		uaddr,
-		nr_pages,
-		rw == READ,
-		0, /* don't force */
-		pages,
-		NULL);
-	up_read(&current->mm->mmap_sem);
-
-	/* Errors and no page mapped should return here */
-	if (res < nr_pages)
-		goto out_unmap;
-
-        for (i=0; i < nr_pages; i++) {
-                /* FIXME: flush superflous for rw==READ,
-                 * probably wrong function for rw==WRITE
-                 */
-		flush_dcache_page(pages[i]);
-		/* ?? Is locking needed? I don't think so */
-		/* if (!trylock_page(pages[i]))
-		   goto out_unlock; */
-        }
-
-	sg_set_page(sgl, pages[0], 0, uaddr & ~PAGE_MASK);
-	if (nr_pages > 1) {
-		sgl[0].length = PAGE_SIZE - sgl[0].offset;
-		count -= sgl[0].length;
-		for (i=1; i < nr_pages ; i++)
-			sg_set_page(&sgl[i], pages[i], count < PAGE_SIZE ? count : PAGE_SIZE, 0);
-	}
-	else {
-		sgl[0].length = count;
-	}
-
-	kfree(pages);
-	return nr_pages;
-
- out_unmap:
-	if (res > 0) {
-		for (j=0; j < res; j++)
-			page_cache_release(pages[j]);
-		res = 0;
-	}
-	kfree(pages);
-	return res;
-}
-
-
-/* And unmap them... */
-static int 
-st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages,
-		    int dirtied)
-{
-	int i;
-
-	for (i=0; i < nr_pages; i++) {
-		struct page *page = sg_page(&sgl[i]);
-
-		if (dirtied)
-			SetPageDirty(page);
-		/* unlock_page(page); */
-		/* FIXME: cache flush missing for rw==READ
-		 * FIXME: call the correct reference counting function
-		 */
-		page_cache_release(page);
-	}
-
-	return 0;
-}
-
-/* ^^^^^^^^  above code borrowed from st driver's direct IO ^^^^^^^^^ */
-#endif
-
-
-/* Returns: -ve -> error, 0 -> done, 1 -> try indirect */
-static int
-sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len)
-{
-#ifdef SG_ALLOW_DIO_CODE
-	sg_io_hdr_t *hp = &srp->header;
-	Sg_scatter_hold *schp = &srp->data;
-	int sg_tablesize = sfp->parentdp->sg_tablesize;
-	int mx_sc_elems, res;
-	struct scsi_device *sdev = sfp->parentdp->device;
-
-	if (((unsigned long)hp->dxferp &
-			queue_dma_alignment(sdev->request_queue)) != 0)
-		return 1;
-
-	mx_sc_elems = sg_build_sgat(schp, sfp, sg_tablesize);
-        if (mx_sc_elems <= 0) {
-                return 1;
-        }
-	res = st_map_user_pages(schp->buffer, mx_sc_elems,
-				(unsigned long)hp->dxferp, dxfer_len, 
-				(SG_DXFER_TO_DEV == hp->dxfer_direction) ? 1 : 0);
-	if (res <= 0) {
-		sg_remove_scat(schp);
-		return 1;
-	}
-	schp->k_use_sg = res;
-	schp->dio_in_use = 1;
-	hp->info |= SG_INFO_DIRECT_IO;
-	return 0;
-#else
-	return 1;
-#endif
-}
-
 static int
 sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
 {
-	struct scatterlist *sg;
-	int ret_sz = 0, k, rem_sz, num, mx_sc_elems;
+	int ret_sz = 0, i, k, rem_sz, num, mx_sc_elems;
 	int sg_tablesize = sfp->parentdp->sg_tablesize;
-	int blk_size = buff_size;
-	struct page *p = NULL;
+	int blk_size = buff_size, order;
+	gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN;
 
 	if (blk_size < 0)
 		return -EFAULT;
@@ -1881,15 +1757,26 @@
 		} else
 			scatter_elem_sz_prev = num;
 	}
-	for (k = 0, sg = schp->buffer, rem_sz = blk_size;
-	     (rem_sz > 0) && (k < mx_sc_elems);
-	     ++k, rem_sz -= ret_sz, sg = sg_next(sg)) {
-		
+
+	if (sfp->low_dma)
+		gfp_mask |= GFP_DMA;
+
+	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
+		gfp_mask |= __GFP_ZERO;
+
+	order = get_order(num);
+retry:
+	ret_sz = 1 << (PAGE_SHIFT + order);
+
+	for (k = 0, rem_sz = blk_size; rem_sz > 0 && k < mx_sc_elems;
+	     k++, rem_sz -= ret_sz) {
+
 		num = (rem_sz > scatter_elem_sz_prev) ?
-		      scatter_elem_sz_prev : rem_sz;
-		p = sg_page_malloc(num, sfp->low_dma, &ret_sz);
-		if (!p)
-			return -ENOMEM;
+			scatter_elem_sz_prev : rem_sz;
+
+		schp->pages[k] = alloc_pages(gfp_mask, order);
+		if (!schp->pages[k])
+			goto out;
 
 		if (num == scatter_elem_sz_prev) {
 			if (unlikely(ret_sz > scatter_elem_sz_prev)) {
@@ -1897,12 +1784,12 @@
 				scatter_elem_sz_prev = ret_sz;
 			}
 		}
-		sg_set_page(sg, p, (ret_sz > num) ? num : ret_sz, 0);
 
 		SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, "
 				 "ret_sz=%d\n", k, num, ret_sz));
 	}		/* end of for loop */
 
+	schp->page_order = order;
 	schp->k_use_sg = k;
 	SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k_use_sg=%d, "
 			 "rem_sz=%d\n", k, rem_sz));
@@ -1910,223 +1797,42 @@
 	schp->bufflen = blk_size;
 	if (rem_sz > 0)	/* must have failed */
 		return -ENOMEM;
-
 	return 0;
-}
+out:
+	for (i = 0; i < k; i++)
+		__free_pages(schp->pages[k], order);
 
-static int
-sg_write_xfer(Sg_request * srp)
-{
-	sg_io_hdr_t *hp = &srp->header;
-	Sg_scatter_hold *schp = &srp->data;
-	struct scatterlist *sg = schp->buffer;
-	int num_xfer = 0;
-	int j, k, onum, usglen, ksglen, res;
-	int iovec_count = (int) hp->iovec_count;
-	int dxfer_dir = hp->dxfer_direction;
-	unsigned char *p;
-	unsigned char __user *up;
-	int new_interface = ('\0' == hp->interface_id) ? 0 : 1;
+	if (--order >= 0)
+		goto retry;
 
-	if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_TO_DEV == dxfer_dir) ||
-	    (SG_DXFER_TO_FROM_DEV == dxfer_dir)) {
-		num_xfer = (int) (new_interface ? hp->dxfer_len : hp->flags);
-		if (schp->bufflen < num_xfer)
-			num_xfer = schp->bufflen;
-	}
-	if ((num_xfer <= 0) || (schp->dio_in_use) ||
-	    (new_interface
-	     && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags)))
-		return 0;
-
-	SCSI_LOG_TIMEOUT(4, printk("sg_write_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n",
-			  num_xfer, iovec_count, schp->k_use_sg));
-	if (iovec_count) {
-		onum = iovec_count;
-		if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum))
-			return -EFAULT;
-	} else
-		onum = 1;
-
-	ksglen = sg->length;
-	p = page_address(sg_page(sg));
-	for (j = 0, k = 0; j < onum; ++j) {
-		res = sg_u_iovec(hp, iovec_count, j, 1, &usglen, &up);
-		if (res)
-			return res;
-
-		for (; p; sg = sg_next(sg), ksglen = sg->length,
-		     p = page_address(sg_page(sg))) {
-			if (usglen <= 0)
-				break;
-			if (ksglen > usglen) {
-				if (usglen >= num_xfer) {
-					if (__copy_from_user(p, up, num_xfer))
-						return -EFAULT;
-					return 0;
-				}
-				if (__copy_from_user(p, up, usglen))
-					return -EFAULT;
-				p += usglen;
-				ksglen -= usglen;
-				break;
-			} else {
-				if (ksglen >= num_xfer) {
-					if (__copy_from_user(p, up, num_xfer))
-						return -EFAULT;
-					return 0;
-				}
-				if (__copy_from_user(p, up, ksglen))
-					return -EFAULT;
-				up += ksglen;
-				usglen -= ksglen;
-			}
-			++k;
-			if (k >= schp->k_use_sg)
-				return 0;
-		}
-	}
-
-	return 0;
-}
-
-static int
-sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind,
-	   int wr_xf, int *countp, unsigned char __user **up)
-{
-	int num_xfer = (int) hp->dxfer_len;
-	unsigned char __user *p = hp->dxferp;
-	int count;
-
-	if (0 == sg_num) {
-		if (wr_xf && ('\0' == hp->interface_id))
-			count = (int) hp->flags;	/* holds "old" input_size */
-		else
-			count = num_xfer;
-	} else {
-		sg_iovec_t iovec;
-		if (__copy_from_user(&iovec, p + ind*SZ_SG_IOVEC, SZ_SG_IOVEC))
-			return -EFAULT;
-		p = iovec.iov_base;
-		count = (int) iovec.iov_len;
-	}
-	if (!access_ok(wr_xf ? VERIFY_READ : VERIFY_WRITE, p, count))
-		return -EFAULT;
-	if (up)
-		*up = p;
-	if (countp)
-		*countp = count;
-	return 0;
+	return -ENOMEM;
 }
 
 static void
 sg_remove_scat(Sg_scatter_hold * schp)
 {
 	SCSI_LOG_TIMEOUT(4, printk("sg_remove_scat: k_use_sg=%d\n", schp->k_use_sg));
-	if (schp->buffer && (schp->sglist_len > 0)) {
-		struct scatterlist *sg = schp->buffer;
-
-		if (schp->dio_in_use) {
-#ifdef SG_ALLOW_DIO_CODE
-			st_unmap_user_pages(sg, schp->k_use_sg, TRUE);
-#endif
-		} else {
+	if (schp->pages && schp->sglist_len > 0) {
+		if (!schp->dio_in_use) {
 			int k;
 
-			for (k = 0; (k < schp->k_use_sg) && sg_page(sg);
-			     ++k, sg = sg_next(sg)) {
+			for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) {
 				SCSI_LOG_TIMEOUT(5, printk(
-				    "sg_remove_scat: k=%d, pg=0x%p, len=%d\n",
-				    k, sg_page(sg), sg->length));
-				sg_page_free(sg_page(sg), sg->length);
+				    "sg_remove_scat: k=%d, pg=0x%p\n",
+				    k, schp->pages[k]));
+				__free_pages(schp->pages[k], schp->page_order);
 			}
+
+			kfree(schp->pages);
 		}
-		kfree(schp->buffer);
 	}
 	memset(schp, 0, sizeof (*schp));
 }
 
 static int
-sg_read_xfer(Sg_request * srp)
-{
-	sg_io_hdr_t *hp = &srp->header;
-	Sg_scatter_hold *schp = &srp->data;
-	struct scatterlist *sg = schp->buffer;
-	int num_xfer = 0;
-	int j, k, onum, usglen, ksglen, res;
-	int iovec_count = (int) hp->iovec_count;
-	int dxfer_dir = hp->dxfer_direction;
-	unsigned char *p;
-	unsigned char __user *up;
-	int new_interface = ('\0' == hp->interface_id) ? 0 : 1;
-
-	if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_FROM_DEV == dxfer_dir)
-	    || (SG_DXFER_TO_FROM_DEV == dxfer_dir)) {
-		num_xfer = hp->dxfer_len;
-		if (schp->bufflen < num_xfer)
-			num_xfer = schp->bufflen;
-	}
-	if ((num_xfer <= 0) || (schp->dio_in_use) ||
-	    (new_interface
-	     && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags)))
-		return 0;
-
-	SCSI_LOG_TIMEOUT(4, printk("sg_read_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n",
-			  num_xfer, iovec_count, schp->k_use_sg));
-	if (iovec_count) {
-		onum = iovec_count;
-		if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum))
-			return -EFAULT;
-	} else
-		onum = 1;
-
-	p = page_address(sg_page(sg));
-	ksglen = sg->length;
-	for (j = 0, k = 0; j < onum; ++j) {
-		res = sg_u_iovec(hp, iovec_count, j, 0, &usglen, &up);
-		if (res)
-			return res;
-
-		for (; p; sg = sg_next(sg), ksglen = sg->length,
-		     p = page_address(sg_page(sg))) {
-			if (usglen <= 0)
-				break;
-			if (ksglen > usglen) {
-				if (usglen >= num_xfer) {
-					if (__copy_to_user(up, p, num_xfer))
-						return -EFAULT;
-					return 0;
-				}
-				if (__copy_to_user(up, p, usglen))
-					return -EFAULT;
-				p += usglen;
-				ksglen -= usglen;
-				break;
-			} else {
-				if (ksglen >= num_xfer) {
-					if (__copy_to_user(up, p, num_xfer))
-						return -EFAULT;
-					return 0;
-				}
-				if (__copy_to_user(up, p, ksglen))
-					return -EFAULT;
-				up += ksglen;
-				usglen -= ksglen;
-			}
-			++k;
-			if (k >= schp->k_use_sg)
-				return 0;
-		}
-	}
-
-	return 0;
-}
-
-static int
 sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer)
 {
 	Sg_scatter_hold *schp = &srp->data;
-	struct scatterlist *sg = schp->buffer;
 	int k, num;
 
 	SCSI_LOG_TIMEOUT(4, printk("sg_read_oxfer: num_read_xfer=%d\n",
@@ -2134,15 +1840,15 @@
 	if ((!outp) || (num_read_xfer <= 0))
 		return 0;
 
-	for (k = 0; (k < schp->k_use_sg) && sg_page(sg); ++k, sg = sg_next(sg)) {
-		num = sg->length;
+	num = 1 << (PAGE_SHIFT + schp->page_order);
+	for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) {
 		if (num > num_read_xfer) {
-			if (__copy_to_user(outp, page_address(sg_page(sg)),
+			if (__copy_to_user(outp, page_address(schp->pages[k]),
 					   num_read_xfer))
 				return -EFAULT;
 			break;
 		} else {
-			if (__copy_to_user(outp, page_address(sg_page(sg)),
+			if (__copy_to_user(outp, page_address(schp->pages[k]),
 					   num))
 				return -EFAULT;
 			num_read_xfer -= num;
@@ -2177,24 +1883,21 @@
 {
 	Sg_scatter_hold *req_schp = &srp->data;
 	Sg_scatter_hold *rsv_schp = &sfp->reserve;
-	struct scatterlist *sg = rsv_schp->buffer;
 	int k, num, rem;
 
 	srp->res_used = 1;
 	SCSI_LOG_TIMEOUT(4, printk("sg_link_reserve: size=%d\n", size));
 	rem = size;
 
-	for (k = 0; k < rsv_schp->k_use_sg; ++k, sg = sg_next(sg)) {
-		num = sg->length;
+	num = 1 << (PAGE_SHIFT + rsv_schp->page_order);
+	for (k = 0; k < rsv_schp->k_use_sg; k++) {
 		if (rem <= num) {
-			sfp->save_scat_len = num;
-			sg->length = rem;
 			req_schp->k_use_sg = k + 1;
 			req_schp->sglist_len = rsv_schp->sglist_len;
-			req_schp->buffer = rsv_schp->buffer;
+			req_schp->pages = rsv_schp->pages;
 
 			req_schp->bufflen = size;
-			req_schp->b_malloc_len = rsv_schp->b_malloc_len;
+			req_schp->page_order = rsv_schp->page_order;
 			break;
 		} else
 			rem -= num;
@@ -2208,22 +1911,13 @@
 sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp)
 {
 	Sg_scatter_hold *req_schp = &srp->data;
-	Sg_scatter_hold *rsv_schp = &sfp->reserve;
 
 	SCSI_LOG_TIMEOUT(4, printk("sg_unlink_reserve: req->k_use_sg=%d\n",
 				   (int) req_schp->k_use_sg));
-	if ((rsv_schp->k_use_sg > 0) && (req_schp->k_use_sg > 0)) {
-		struct scatterlist *sg = rsv_schp->buffer;
-
-		if (sfp->save_scat_len > 0)
-			(sg + (req_schp->k_use_sg - 1))->length =
-			    (unsigned) sfp->save_scat_len;
-		else
-			SCSI_LOG_TIMEOUT(1, printk ("sg_unlink_reserve: BAD save_scat_len\n"));
-	}
 	req_schp->k_use_sg = 0;
 	req_schp->bufflen = 0;
-	req_schp->buffer = NULL;
+	req_schp->pages = NULL;
+	req_schp->page_order = 0;
 	req_schp->sglist_len = 0;
 	sfp->save_scat_len = 0;
 	srp->res_used = 0;
@@ -2481,53 +2175,6 @@
 	return srp ? 1 : 0;
 }
 
-/* The size fetched (value output via retSzp) set when non-NULL return */
-static struct page *
-sg_page_malloc(int rqSz, int lowDma, int *retSzp)
-{
-	struct page *resp = NULL;
-	gfp_t page_mask;
-	int order, a_size;
-	int resSz;
-
-	if ((rqSz <= 0) || (NULL == retSzp))
-		return resp;
-
-	if (lowDma)
-		page_mask = GFP_ATOMIC | GFP_DMA | __GFP_COMP | __GFP_NOWARN;
-	else
-		page_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN;
-
-	for (order = 0, a_size = PAGE_SIZE; a_size < rqSz;
-	     order++, a_size <<= 1) ;
-	resSz = a_size;		/* rounded up if necessary */
-	resp = alloc_pages(page_mask, order);
-	while ((!resp) && order) {
-		--order;
-		a_size >>= 1;	/* divide by 2, until PAGE_SIZE */
-		resp =  alloc_pages(page_mask, order);	/* try half */
-		resSz = a_size;
-	}
-	if (resp) {
-		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
-			memset(page_address(resp), 0, resSz);
-		*retSzp = resSz;
-	}
-	return resp;
-}
-
-static void
-sg_page_free(struct page *page, int size)
-{
-	int order, a_size;
-
-	if (!page)
-		return;
-	for (order = 0, a_size = PAGE_SIZE; a_size < size;
-	     order++, a_size <<= 1) ;
-	__free_pages(page, order);
-}
-
 #ifdef CONFIG_SCSI_PROC_FS
 static int
 sg_idr_max_id(int id, void *p, void *data)

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 27f5bfd..0f17009 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c

@@ -331,7 +331,7 @@
 
 static int sr_prep_fn(struct request_queue *q, struct request *rq)
 {
-	int block=0, this_count, s_size, timeout = SR_TIMEOUT;
+	int block = 0, this_count, s_size;
 	struct scsi_cd *cd;
 	struct scsi_cmnd *SCpnt;
 	struct scsi_device *sdp = q->queuedata;
@@ -461,7 +461,6 @@
 	SCpnt->transfersize = cd->device->sector_size;
 	SCpnt->underflow = this_count << 9;
 	SCpnt->allowed = MAX_RETRIES;
-	SCpnt->timeout_per_command = timeout;
 
 	/*
 	 * This indicates that the command is ready from our end to be
@@ -620,6 +619,8 @@
 	disk->fops = &sr_bdops;
 	disk->flags = GENHD_FL_CD;
 
+	blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
+
 	cd->device = sdev;
 	cd->disk = disk;
 	cd->driver = &sr_template;
@@ -878,7 +879,7 @@
 	struct gendisk *disk = cd->disk;
 
 	spin_lock(&sr_index_lock);
-	clear_bit(disk->first_minor, sr_index_bits);
+	clear_bit(MINOR(disk_devt(disk)), sr_index_bits);
 	spin_unlock(&sr_index_lock);
 
 	unregister_cdrom(&cd->cdi);

diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index d39107b..f4e6cde 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c

@@ -519,8 +519,8 @@
 	 *  Shorten our settle_time if needed for 
 	 *  this command not to time out.
 	 */
-	if (np->s.settle_time_valid && cmd->timeout_per_command) {
-		unsigned long tlimit = jiffies + cmd->timeout_per_command;
+	if (np->s.settle_time_valid && cmd->request->timeout) {
+		unsigned long tlimit = jiffies + cmd->request->timeout;
 		tlimit -= SYM_CONF_TIMER_INTERVAL*2;
 		if (time_after(np->s.settle_time, tlimit)) {
 			np->s.settle_time = tlimit;

diff --git a/drivers/spi/orion_spi.c b/drivers/spi/orion_spi.c
index c4eaacd..b872bfa 100644
--- a/drivers/spi/orion_spi.c
+++ b/drivers/spi/orion_spi.c

@@ -427,7 +427,7 @@
 			goto msg_rejected;
 		}
 
-		if (t->speed_hz < orion_spi->min_speed) {
+		if (t->speed_hz && t->speed_hz < orion_spi->min_speed) {
 			dev_err(&spi->dev,
 				"message rejected : "
 				"device min speed (%d Hz) exceeds "

diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index c6299e8..9cbff84 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c

@@ -2400,11 +2400,15 @@
 
  	if (!fbcon_is_inactive(vc, info)) {
 		if (ops->blank_state != blank) {
+			int ret = 1;
+
 			ops->blank_state = blank;
 			fbcon_cursor(vc, blank ? CM_ERASE : CM_DRAW);
 			ops->cursor_flash = (!blank);
 
-			if (fb_blank(info, blank))
+			if (info->fbops->fb_blank)
+				ret = info->fbops->fb_blank(blank, info);
+			if (ret)
 				fbcon_generic_blank(vc, info, blank);
 		}
 

diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index c3e174b..19caf7c 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c

@@ -107,7 +107,8 @@
 	BUG_ON(bip == NULL);
 
 	/* A cloned bio doesn't own the integrity metadata */
-	if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL)
+	if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY)
+	    && bip->bip_buf != NULL)
 		kfree(bip->bip_buf);
 
 	mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
@@ -150,6 +151,24 @@
 }
 EXPORT_SYMBOL(bio_integrity_add_page);
 
+static int bdev_integrity_enabled(struct block_device *bdev, int rw)
+{
+	struct blk_integrity *bi = bdev_get_integrity(bdev);
+
+	if (bi == NULL)
+		return 0;
+
+	if (rw == READ && bi->verify_fn != NULL &&
+	    (bi->flags & INTEGRITY_FLAG_READ))
+		return 1;
+
+	if (rw == WRITE && bi->generate_fn != NULL &&
+	    (bi->flags & INTEGRITY_FLAG_WRITE))
+		return 1;
+
+	return 0;
+}
+
 /**
  * bio_integrity_enabled - Check whether integrity can be passed
  * @bio:	bio to check
@@ -313,6 +332,14 @@
 	}
 }
 
+static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
+{
+	if (bi)
+		return bi->tuple_size;
+
+	return 0;
+}
+
 /**
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:	bio to prepare

diff --git a/fs/bio.c b/fs/bio.c
index 3cba7ae..77a55bc 100644
--- a/fs/bio.c
+++ b/fs/bio.c

@@ -30,7 +30,7 @@
 
 static struct kmem_cache *bio_slab __read_mostly;
 
-mempool_t *bio_split_pool __read_mostly;
+static mempool_t *bio_split_pool __read_mostly;
 
 /*
  * if you change this list, also change bvec_alloc or things will
@@ -60,25 +60,46 @@
 	struct bio_vec *bvl;
 
 	/*
-	 * see comment near bvec_array define!
+	 * If 'bs' is given, lookup the pool and do the mempool alloc.
+	 * If not, this is a bio_kmalloc() allocation and just do a
+	 * kzalloc() for the exact number of vecs right away.
 	 */
-	switch (nr) {
-		case   1        : *idx = 0; break;
-		case   2 ...   4: *idx = 1; break;
-		case   5 ...  16: *idx = 2; break;
-		case  17 ...  64: *idx = 3; break;
-		case  65 ... 128: *idx = 4; break;
-		case 129 ... BIO_MAX_PAGES: *idx = 5; break;
+	if (bs) {
+		/*
+		 * see comment near bvec_array define!
+		 */
+		switch (nr) {
+		case 1:
+			*idx = 0;
+			break;
+		case 2 ... 4:
+			*idx = 1;
+			break;
+		case 5 ... 16:
+			*idx = 2;
+			break;
+		case 17 ... 64:
+			*idx = 3;
+			break;
+		case 65 ... 128:
+			*idx = 4;
+			break;
+		case 129 ... BIO_MAX_PAGES:
+			*idx = 5;
+			break;
 		default:
 			return NULL;
-	}
-	/*
-	 * idx now points to the pool we want to allocate from
-	 */
+		}
 
-	bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
-	if (bvl)
-		memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
+		/*
+		 * idx now points to the pool we want to allocate from
+		 */
+		bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
+		if (bvl)
+			memset(bvl, 0,
+				bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
+	} else
+		bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask);
 
 	return bvl;
 }
@@ -107,10 +128,17 @@
 	bio_free(bio, fs_bio_set);
 }
 
+static void bio_kmalloc_destructor(struct bio *bio)
+{
+	kfree(bio->bi_io_vec);
+	kfree(bio);
+}
+
 void bio_init(struct bio *bio)
 {
 	memset(bio, 0, sizeof(*bio));
 	bio->bi_flags = 1 << BIO_UPTODATE;
+	bio->bi_comp_cpu = -1;
 	atomic_set(&bio->bi_cnt, 1);
 }
 
@@ -118,19 +146,25 @@
  * bio_alloc_bioset - allocate a bio for I/O
  * @gfp_mask:   the GFP_ mask given to the slab allocator
  * @nr_iovecs:	number of iovecs to pre-allocate
- * @bs:		the bio_set to allocate from
+ * @bs:		the bio_set to allocate from. If %NULL, just use kmalloc
  *
  * Description:
- *   bio_alloc_bioset will first try it's on mempool to satisfy the allocation.
+ *   bio_alloc_bioset will first try its own mempool to satisfy the allocation.
  *   If %__GFP_WAIT is set then we will block on the internal pool waiting
- *   for a &struct bio to become free.
+ *   for a &struct bio to become free. If a %NULL @bs is passed in, we will
+ *   fall back to just using @kmalloc to allocate the required memory.
  *
  *   allocate bio and iovecs from the memory pools specified by the
- *   bio_set structure.
+ *   bio_set structure, or @kmalloc if none given.
  **/
 struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 {
-	struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask);
+	struct bio *bio;
+
+	if (bs)
+		bio = mempool_alloc(bs->bio_pool, gfp_mask);
+	else
+		bio = kmalloc(sizeof(*bio), gfp_mask);
 
 	if (likely(bio)) {
 		struct bio_vec *bvl = NULL;
@@ -141,7 +175,10 @@
 
 			bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
 			if (unlikely(!bvl)) {
-				mempool_free(bio, bs->bio_pool);
+				if (bs)
+					mempool_free(bio, bs->bio_pool);
+				else
+					kfree(bio);
 				bio = NULL;
 				goto out;
 			}
@@ -164,6 +201,23 @@
 	return bio;
 }
 
+/*
+ * Like bio_alloc(), but doesn't use a mempool backing. This means that
+ * it CAN fail, but while bio_alloc() can only be used for allocations
+ * that have a short (finite) life span, bio_kmalloc() should be used
+ * for more permanent bio allocations (like allocating some bio's for
+ * initalization or setup purposes).
+ */
+struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
+{
+	struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
+
+	if (bio)
+		bio->bi_destructor = bio_kmalloc_destructor;
+
+	return bio;
+}
+
 void zero_fill_bio(struct bio *bio)
 {
 	unsigned long flags;
@@ -208,14 +262,6 @@
 	return bio->bi_phys_segments;
 }
 
-inline int bio_hw_segments(struct request_queue *q, struct bio *bio)
-{
-	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
-		blk_recount_segments(q, bio);
-
-	return bio->bi_hw_segments;
-}
-
 /**
  * 	__bio_clone	-	clone a bio
  * 	@bio: destination bio
@@ -350,8 +396,7 @@
 	 */
 
 	while (bio->bi_phys_segments >= q->max_phys_segments
-	       || bio->bi_hw_segments >= q->max_hw_segments
-	       || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) {
+	       || bio->bi_phys_segments >= q->max_hw_segments) {
 
 		if (retried_segments)
 			return 0;
@@ -395,13 +440,11 @@
 	}
 
 	/* If we may be able to merge these biovecs, force a recount */
-	if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) ||
-	    BIOVEC_VIRT_MERGEABLE(bvec-1, bvec)))
+	if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
 		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
 
 	bio->bi_vcnt++;
 	bio->bi_phys_segments++;
-	bio->bi_hw_segments++;
  done:
 	bio->bi_size += len;
 	return len;
@@ -449,16 +492,19 @@
 
 struct bio_map_data {
 	struct bio_vec *iovecs;
-	int nr_sgvecs;
 	struct sg_iovec *sgvecs;
+	int nr_sgvecs;
+	int is_our_pages;
 };
 
 static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
-			     struct sg_iovec *iov, int iov_count)
+			     struct sg_iovec *iov, int iov_count,
+			     int is_our_pages)
 {
 	memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
 	memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
 	bmd->nr_sgvecs = iov_count;
+	bmd->is_our_pages = is_our_pages;
 	bio->bi_private = bmd;
 }
 
@@ -493,7 +539,8 @@
 }
 
 static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
-			  struct sg_iovec *iov, int iov_count, int uncopy)
+			  struct sg_iovec *iov, int iov_count, int uncopy,
+			  int do_free_page)
 {
 	int ret = 0, i;
 	struct bio_vec *bvec;
@@ -536,7 +583,7 @@
 			}
 		}
 
-		if (uncopy)
+		if (do_free_page)
 			__free_page(bvec->bv_page);
 	}
 
@@ -553,10 +600,11 @@
 int bio_uncopy_user(struct bio *bio)
 {
 	struct bio_map_data *bmd = bio->bi_private;
-	int ret;
+	int ret = 0;
 
-	ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1);
-
+	if (!bio_flagged(bio, BIO_NULL_MAPPED))
+		ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
+				     bmd->nr_sgvecs, 1, bmd->is_our_pages);
 	bio_free_map_data(bmd);
 	bio_put(bio);
 	return ret;
@@ -565,16 +613,20 @@
 /**
  *	bio_copy_user_iov	-	copy user data to bio
  *	@q: destination block queue
+ *	@map_data: pointer to the rq_map_data holding pages (if necessary)
  *	@iov:	the iovec.
  *	@iov_count: number of elements in the iovec
  *	@write_to_vm: bool indicating writing to pages or not
+ *	@gfp_mask: memory allocation flags
  *
  *	Prepares and returns a bio for indirect user io, bouncing data
  *	to/from kernel pages as necessary. Must be paired with
  *	call bio_uncopy_user() on io completion.
  */
-struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
-			      int iov_count, int write_to_vm)
+struct bio *bio_copy_user_iov(struct request_queue *q,
+			      struct rq_map_data *map_data,
+			      struct sg_iovec *iov, int iov_count,
+			      int write_to_vm, gfp_t gfp_mask)
 {
 	struct bio_map_data *bmd;
 	struct bio_vec *bvec;
@@ -597,25 +649,38 @@
 		len += iov[i].iov_len;
 	}
 
-	bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL);
+	bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
 	if (!bmd)
 		return ERR_PTR(-ENOMEM);
 
 	ret = -ENOMEM;
-	bio = bio_alloc(GFP_KERNEL, nr_pages);
+	bio = bio_alloc(gfp_mask, nr_pages);
 	if (!bio)
 		goto out_bmd;
 
 	bio->bi_rw |= (!write_to_vm << BIO_RW);
 
 	ret = 0;
+	i = 0;
 	while (len) {
-		unsigned int bytes = PAGE_SIZE;
+		unsigned int bytes;
+
+		if (map_data)
+			bytes = 1U << (PAGE_SHIFT + map_data->page_order);
+		else
+			bytes = PAGE_SIZE;
 
 		if (bytes > len)
 			bytes = len;
 
-		page = alloc_page(q->bounce_gfp | GFP_KERNEL);
+		if (map_data) {
+			if (i == map_data->nr_entries) {
+				ret = -ENOMEM;
+				break;
+			}
+			page = map_data->pages[i++];
+		} else
+			page = alloc_page(q->bounce_gfp | gfp_mask);
 		if (!page) {
 			ret = -ENOMEM;
 			break;
@@ -634,16 +699,17 @@
 	 * success
 	 */
 	if (!write_to_vm) {
-		ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0);
+		ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0);
 		if (ret)
 			goto cleanup;
 	}
 
-	bio_set_map_data(bmd, bio, iov, iov_count);
+	bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
 	return bio;
 cleanup:
-	bio_for_each_segment(bvec, bio, i)
-		__free_page(bvec->bv_page);
+	if (!map_data)
+		bio_for_each_segment(bvec, bio, i)
+			__free_page(bvec->bv_page);
 
 	bio_put(bio);
 out_bmd:
@@ -654,29 +720,32 @@
 /**
  *	bio_copy_user	-	copy user data to bio
  *	@q: destination block queue
+ *	@map_data: pointer to the rq_map_data holding pages (if necessary)
  *	@uaddr: start of user address
  *	@len: length in bytes
  *	@write_to_vm: bool indicating writing to pages or not
+ *	@gfp_mask: memory allocation flags
  *
  *	Prepares and returns a bio for indirect user io, bouncing data
  *	to/from kernel pages as necessary. Must be paired with
  *	call bio_uncopy_user() on io completion.
  */
-struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr,
-			  unsigned int len, int write_to_vm)
+struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
+			  unsigned long uaddr, unsigned int len,
+			  int write_to_vm, gfp_t gfp_mask)
 {
 	struct sg_iovec iov;
 
 	iov.iov_base = (void __user *)uaddr;
 	iov.iov_len = len;
 
-	return bio_copy_user_iov(q, &iov, 1, write_to_vm);
+	return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
 }
 
 static struct bio *__bio_map_user_iov(struct request_queue *q,
 				      struct block_device *bdev,
 				      struct sg_iovec *iov, int iov_count,
-				      int write_to_vm)
+				      int write_to_vm, gfp_t gfp_mask)
 {
 	int i, j;
 	int nr_pages = 0;
@@ -702,12 +771,12 @@
 	if (!nr_pages)
 		return ERR_PTR(-EINVAL);
 
-	bio = bio_alloc(GFP_KERNEL, nr_pages);
+	bio = bio_alloc(gfp_mask, nr_pages);
 	if (!bio)
 		return ERR_PTR(-ENOMEM);
 
 	ret = -ENOMEM;
-	pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
+	pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
 	if (!pages)
 		goto out;
 
@@ -786,19 +855,21 @@
  *	@uaddr: start of user address
  *	@len: length in bytes
  *	@write_to_vm: bool indicating writing to pages or not
+ *	@gfp_mask: memory allocation flags
  *
  *	Map the user space address into a bio suitable for io to a block
  *	device. Returns an error pointer in case of error.
  */
 struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
-			 unsigned long uaddr, unsigned int len, int write_to_vm)
+			 unsigned long uaddr, unsigned int len, int write_to_vm,
+			 gfp_t gfp_mask)
 {
 	struct sg_iovec iov;
 
 	iov.iov_base = (void __user *)uaddr;
 	iov.iov_len = len;
 
-	return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm);
+	return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
 }
 
 /**
@@ -808,18 +879,19 @@
  *	@iov:	the iovec.
  *	@iov_count: number of elements in the iovec
  *	@write_to_vm: bool indicating writing to pages or not
+ *	@gfp_mask: memory allocation flags
  *
  *	Map the user space address into a bio suitable for io to a block
  *	device. Returns an error pointer in case of error.
  */
 struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
 			     struct sg_iovec *iov, int iov_count,
-			     int write_to_vm)
+			     int write_to_vm, gfp_t gfp_mask)
 {
 	struct bio *bio;
 
-	bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm);
-
+	bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
+				 gfp_mask);
 	if (IS_ERR(bio))
 		return bio;
 
@@ -976,48 +1048,13 @@
 struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
 			  gfp_t gfp_mask, int reading)
 {
-	unsigned long kaddr = (unsigned long)data;
-	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	unsigned long start = kaddr >> PAGE_SHIFT;
-	const int nr_pages = end - start;
 	struct bio *bio;
 	struct bio_vec *bvec;
-	struct bio_map_data *bmd;
-	int i, ret;
-	struct sg_iovec iov;
+	int i;
 
-	iov.iov_base = data;
-	iov.iov_len = len;
-
-	bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask);
-	if (!bmd)
-		return ERR_PTR(-ENOMEM);
-
-	ret = -ENOMEM;
-	bio = bio_alloc(gfp_mask, nr_pages);
-	if (!bio)
-		goto out_bmd;
-
-	while (len) {
-		struct page *page;
-		unsigned int bytes = PAGE_SIZE;
-
-		if (bytes > len)
-			bytes = len;
-
-		page = alloc_page(q->bounce_gfp | gfp_mask);
-		if (!page) {
-			ret = -ENOMEM;
-			goto cleanup;
-		}
-
-		if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
-			ret = -EINVAL;
-			goto cleanup;
-		}
-
-		len -= bytes;
-	}
+	bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
+	if (IS_ERR(bio))
+		return bio;
 
 	if (!reading) {
 		void *p = data;
@@ -1030,20 +1067,9 @@
 		}
 	}
 
-	bio->bi_private = bmd;
 	bio->bi_end_io = bio_copy_kern_endio;
 
-	bio_set_map_data(bmd, bio, &iov, 1);
 	return bio;
-cleanup:
-	bio_for_each_segment(bvec, bio, i)
-		__free_page(bvec->bv_page);
-
-	bio_put(bio);
-out_bmd:
-	bio_free_map_data(bmd);
-
-	return ERR_PTR(ret);
 }
 
 /*
@@ -1230,9 +1256,9 @@
  * split a bio - only worry about a bio with a single page
  * in it's iovec
  */
-struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
+struct bio_pair *bio_split(struct bio *bi, int first_sectors)
 {
-	struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO);
+	struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
 
 	if (!bp)
 		return bp;
@@ -1266,7 +1292,7 @@
 	bp->bio2.bi_end_io = bio_pair_end_2;
 
 	bp->bio1.bi_private = bi;
-	bp->bio2.bi_private = pool;
+	bp->bio2.bi_private = bio_split_pool;
 
 	if (bio_integrity(bi))
 		bio_integrity_split(bi, bp, first_sectors);
@@ -1274,6 +1300,42 @@
 	return bp;
 }
 
+/**
+ *      bio_sector_offset - Find hardware sector offset in bio
+ *      @bio:           bio to inspect
+ *      @index:         bio_vec index
+ *      @offset:        offset in bv_page
+ *
+ *      Return the number of hardware sectors between beginning of bio
+ *      and an end point indicated by a bio_vec index and an offset
+ *      within that vector's page.
+ */
+sector_t bio_sector_offset(struct bio *bio, unsigned short index,
+			   unsigned int offset)
+{
+	unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue);
+	struct bio_vec *bv;
+	sector_t sectors;
+	int i;
+
+	sectors = 0;
+
+	if (index >= bio->bi_idx)
+		index = bio->bi_vcnt - 1;
+
+	__bio_for_each_segment(bv, bio, i, 0) {
+		if (i == index) {
+			if (offset > bv->bv_offset)
+				sectors += (offset - bv->bv_offset) / sector_sz;
+			break;
+		}
+
+		sectors += bv->bv_len / sector_sz;
+	}
+
+	return sectors;
+}
+EXPORT_SYMBOL(bio_sector_offset);
 
 /*
  * create memory pools for biovec's in a bio_set.
@@ -1376,6 +1438,7 @@
 subsys_initcall(init_bio);
 
 EXPORT_SYMBOL(bio_alloc);
+EXPORT_SYMBOL(bio_kmalloc);
 EXPORT_SYMBOL(bio_put);
 EXPORT_SYMBOL(bio_free);
 EXPORT_SYMBOL(bio_endio);
@@ -1383,7 +1446,6 @@
 EXPORT_SYMBOL(__bio_clone);
 EXPORT_SYMBOL(bio_clone);
 EXPORT_SYMBOL(bio_phys_segments);
-EXPORT_SYMBOL(bio_hw_segments);
 EXPORT_SYMBOL(bio_add_page);
 EXPORT_SYMBOL(bio_add_pc_page);
 EXPORT_SYMBOL(bio_get_nr_vecs);
@@ -1393,7 +1455,6 @@
 EXPORT_SYMBOL(bio_copy_kern);
 EXPORT_SYMBOL(bio_pair_release);
 EXPORT_SYMBOL(bio_split);
-EXPORT_SYMBOL(bio_split_pool);
 EXPORT_SYMBOL(bio_copy_user);
 EXPORT_SYMBOL(bio_uncopy_user);
 EXPORT_SYMBOL(bioset_create);

diff --git a/fs/block_dev.c b/fs/block_dev.c
index aff5421..d84f0469 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c

@@ -540,22 +540,6 @@
  *           /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
  */
 
-static struct kobject *bdev_get_kobj(struct block_device *bdev)
-{
-	if (bdev->bd_contains != bdev)
-		return kobject_get(&bdev->bd_part->dev.kobj);
-	else
-		return kobject_get(&bdev->bd_disk->dev.kobj);
-}
-
-static struct kobject *bdev_get_holder(struct block_device *bdev)
-{
-	if (bdev->bd_contains != bdev)
-		return kobject_get(bdev->bd_part->holder_dir);
-	else
-		return kobject_get(bdev->bd_disk->holder_dir);
-}
-
 static int add_symlink(struct kobject *from, struct kobject *to)
 {
 	if (!from || !to)
@@ -604,11 +588,11 @@
 	if (!bo->hdev)
 		goto fail_put_sdir;
 
-	bo->sdev = bdev_get_kobj(bdev);
+	bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
 	if (!bo->sdev)
 		goto fail_put_hdev;
 
-	bo->hdir = bdev_get_holder(bdev);
+	bo->hdir = kobject_get(bdev->bd_part->holder_dir);
 	if (!bo->hdir)
 		goto fail_put_sdev;
 
@@ -868,6 +852,87 @@
 
 EXPORT_SYMBOL(open_by_devnum);
 
+/**
+ * flush_disk - invalidates all buffer-cache entries on a disk
+ *
+ * @bdev:      struct block device to be flushed
+ *
+ * Invalidates all buffer-cache entries on a disk. It should be called
+ * when a disk has been changed -- either by a media change or online
+ * resize.
+ */
+static void flush_disk(struct block_device *bdev)
+{
+	if (__invalidate_device(bdev)) {
+		char name[BDEVNAME_SIZE] = "";
+
+		if (bdev->bd_disk)
+			disk_name(bdev->bd_disk, 0, name);
+		printk(KERN_WARNING "VFS: busy inodes on changed media or "
+		       "resized disk %s\n", name);
+	}
+
+	if (!bdev->bd_disk)
+		return;
+	if (disk_partitionable(bdev->bd_disk))
+		bdev->bd_invalidated = 1;
+}
+
+/**
+ * check_disk_size_change - checks for disk size change and adjusts bdev size.
+ * @disk: struct gendisk to check
+ * @bdev: struct bdev to adjust.
+ *
+ * This routine checks to see if the bdev size does not match the disk size
+ * and adjusts it if it differs.
+ */
+void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
+{
+	loff_t disk_size, bdev_size;
+
+	disk_size = (loff_t)get_capacity(disk) << 9;
+	bdev_size = i_size_read(bdev->bd_inode);
+	if (disk_size != bdev_size) {
+		char name[BDEVNAME_SIZE];
+
+		disk_name(disk, 0, name);
+		printk(KERN_INFO
+		       "%s: detected capacity change from %lld to %lld\n",
+		       name, bdev_size, disk_size);
+		i_size_write(bdev->bd_inode, disk_size);
+		flush_disk(bdev);
+	}
+}
+EXPORT_SYMBOL(check_disk_size_change);
+
+/**
+ * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
+ * @disk: struct gendisk to be revalidated
+ *
+ * This routine is a wrapper for lower-level driver's revalidate_disk
+ * call-backs.  It is used to do common pre and post operations needed
+ * for all revalidate_disk operations.
+ */
+int revalidate_disk(struct gendisk *disk)
+{
+	struct block_device *bdev;
+	int ret = 0;
+
+	if (disk->fops->revalidate_disk)
+		ret = disk->fops->revalidate_disk(disk);
+
+	bdev = bdget_disk(disk, 0);
+	if (!bdev)
+		return ret;
+
+	mutex_lock(&bdev->bd_mutex);
+	check_disk_size_change(disk, bdev);
+	mutex_unlock(&bdev->bd_mutex);
+	bdput(bdev);
+	return ret;
+}
+EXPORT_SYMBOL(revalidate_disk);
+
 /*
  * This routine checks whether a removable media has been changed,
  * and invalidates all buffer-cache-entries in that case. This
@@ -887,13 +952,9 @@
 	if (!bdops->media_changed(bdev->bd_disk))
 		return 0;
 
-	if (__invalidate_device(bdev))
-		printk("VFS: busy inodes on changed media.\n");
-
+	flush_disk(bdev);
 	if (bdops->revalidate_disk)
 		bdops->revalidate_disk(bdev->bd_disk);
-	if (bdev->bd_disk->minors > 1)
-		bdev->bd_invalidated = 1;
 	return 1;
 }
 
@@ -927,10 +988,10 @@
 
 static int do_open(struct block_device *bdev, struct file *file, int for_part)
 {
-	struct module *owner = NULL;
 	struct gendisk *disk;
+	struct hd_struct *part = NULL;
 	int ret;
-	int part;
+	int partno;
 	int perm = 0;
 
 	if (file->f_mode & FMODE_READ)
@@ -948,25 +1009,27 @@
 
 	ret = -ENXIO;
 	file->f_mapping = bdev->bd_inode->i_mapping;
+
 	lock_kernel();
-	disk = get_gendisk(bdev->bd_dev, &part);
-	if (!disk) {
-		unlock_kernel();
-		bdput(bdev);
-		return ret;
-	}
-	owner = disk->fops->owner;
+
+	disk = get_gendisk(bdev->bd_dev, &partno);
+	if (!disk)
+		goto out_unlock_kernel;
+	part = disk_get_part(disk, partno);
+	if (!part)
+		goto out_unlock_kernel;
 
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (!bdev->bd_openers) {
 		bdev->bd_disk = disk;
+		bdev->bd_part = part;
 		bdev->bd_contains = bdev;
-		if (!part) {
+		if (!partno) {
 			struct backing_dev_info *bdi;
 			if (disk->fops->open) {
 				ret = disk->fops->open(bdev->bd_inode, file);
 				if (ret)
-					goto out_first;
+					goto out_clear;
 			}
 			if (!bdev->bd_openers) {
 				bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
@@ -978,36 +1041,36 @@
 			if (bdev->bd_invalidated)
 				rescan_partitions(disk, bdev);
 		} else {
-			struct hd_struct *p;
 			struct block_device *whole;
 			whole = bdget_disk(disk, 0);
 			ret = -ENOMEM;
 			if (!whole)
-				goto out_first;
+				goto out_clear;
 			BUG_ON(for_part);
 			ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1);
 			if (ret)
-				goto out_first;
+				goto out_clear;
 			bdev->bd_contains = whole;
-			p = disk->part[part - 1];
 			bdev->bd_inode->i_data.backing_dev_info =
 			   whole->bd_inode->i_data.backing_dev_info;
-			if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
+			if (!(disk->flags & GENHD_FL_UP) ||
+			    !part || !part->nr_sects) {
 				ret = -ENXIO;
-				goto out_first;
+				goto out_clear;
 			}
-			kobject_get(&p->dev.kobj);
-			bdev->bd_part = p;
-			bd_set_size(bdev, (loff_t) p->nr_sects << 9);
+			bd_set_size(bdev, (loff_t)part->nr_sects << 9);
 		}
 	} else {
+		disk_put_part(part);
 		put_disk(disk);
-		module_put(owner);
+		module_put(disk->fops->owner);
+		part = NULL;
+		disk = NULL;
 		if (bdev->bd_contains == bdev) {
 			if (bdev->bd_disk->fops->open) {
 				ret = bdev->bd_disk->fops->open(bdev->bd_inode, file);
 				if (ret)
-					goto out;
+					goto out_unlock_bdev;
 			}
 			if (bdev->bd_invalidated)
 				rescan_partitions(bdev->bd_disk, bdev);
@@ -1020,19 +1083,24 @@
 	unlock_kernel();
 	return 0;
 
-out_first:
+ out_clear:
 	bdev->bd_disk = NULL;
+	bdev->bd_part = NULL;
 	bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 	if (bdev != bdev->bd_contains)
 		__blkdev_put(bdev->bd_contains, 1);
 	bdev->bd_contains = NULL;
-	put_disk(disk);
-	module_put(owner);
-out:
+ out_unlock_bdev:
 	mutex_unlock(&bdev->bd_mutex);
+ out_unlock_kernel:
 	unlock_kernel();
-	if (ret)
-		bdput(bdev);
+
+	disk_put_part(part);
+	if (disk)
+		module_put(disk->fops->owner);
+	put_disk(disk);
+	bdput(bdev);
+
 	return ret;
 }
 
@@ -1117,11 +1185,8 @@
 
 		put_disk(disk);
 		module_put(owner);
-
-		if (bdev->bd_contains != bdev) {
-			kobject_put(&bdev->bd_part->dev.kobj);
-			bdev->bd_part = NULL;
-		}
+		disk_put_part(bdev->bd_part);
+		bdev->bd_part = NULL;
 		bdev->bd_disk = NULL;
 		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 		if (bdev != bdev->bd_contains)
@@ -1197,10 +1262,9 @@
 
 /**
  * lookup_bdev  - lookup a struct block_device by name
+ * @pathname:	special file representing the block device
  *
- * @path:	special file representing the block device
- *
- * Get a reference to the blockdevice at @path in the current
+ * Get a reference to the blockdevice at @pathname in the current
  * namespace if possible and return it.  Return ERR_PTR(error)
  * otherwise.
  */

diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 302e95c..fb98b3d 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c

@@ -6,6 +6,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/msdos_fs.h>
+#include <linux/blkdev.h>
 
 struct fatent_operations {
 	void (*ent_blocknr)(struct super_block *, int, int *, sector_t *);
@@ -535,6 +536,7 @@
 	struct fat_entry fatent;
 	struct buffer_head *bhs[MAX_BUF_PER_PAGE];
 	int i, err, nr_bhs;
+	int first_cl = cluster;
 
 	nr_bhs = 0;
 	fatent_init(&fatent);
@@ -551,6 +553,18 @@
 			goto error;
 		}
 
+		/* 
+		 * Issue discard for the sectors we no longer care about,
+		 * batching contiguous clusters into one request
+		 */
+		if (cluster != fatent.entry + 1) {
+			int nr_clus = fatent.entry - first_cl + 1;
+
+			sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl),
+					 nr_clus * sbi->sec_per_clus);
+			first_cl = cluster;
+		}
+
 		ops->ent_put(&fatent, FAT_ENT_FREE);
 		if (sbi->free_clusters != -1) {
 			sbi->free_clusters++;

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index ecc3330..7408227 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c

@@ -120,22 +120,21 @@
  * a pointer to that same buffer (for convenience).
  */
 
-char *disk_name(struct gendisk *hd, int part, char *buf)
+char *disk_name(struct gendisk *hd, int partno, char *buf)
 {
-	if (!part)
+	if (!partno)
 		snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
 	else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
-		snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, part);
+		snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
 	else
-		snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, part);
+		snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
 
 	return buf;
 }
 
 const char *bdevname(struct block_device *bdev, char *buf)
 {
-	int part = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor;
-	return disk_name(bdev->bd_disk, part, buf);
+	return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
 }
 
 EXPORT_SYMBOL(bdevname);
@@ -169,7 +168,7 @@
 	if (isdigit(state->name[strlen(state->name)-1]))
 		sprintf(state->name, "p");
 
-	state->limit = hd->minors;
+	state->limit = disk_max_parts(hd);
 	i = res = err = 0;
 	while (!res && check_part[i]) {
 		memset(&state->parts, 0, sizeof(state->parts));
@@ -204,21 +203,22 @@
 	return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
 }
 
-static ssize_t part_size_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
+ssize_t part_size_show(struct device *dev,
+		       struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
 	return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
 }
 
-static ssize_t part_stat_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
+ssize_t part_stat_show(struct device *dev,
+		       struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
+	int cpu;
 
-	preempt_disable();
-	part_round_stats(p);
-	preempt_enable();
+	cpu = part_stat_lock();
+	part_round_stats(cpu, p);
+	part_stat_unlock();
 	return sprintf(buf,
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
@@ -238,17 +238,17 @@
 }
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
-static ssize_t part_fail_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
+ssize_t part_fail_show(struct device *dev,
+		       struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
 
 	return sprintf(buf, "%d\n", p->make_it_fail);
 }
 
-static ssize_t part_fail_store(struct device *dev,
-			       struct device_attribute *attr,
-			       const char *buf, size_t count)
+ssize_t part_fail_store(struct device *dev,
+			struct device_attribute *attr,
+			const char *buf, size_t count)
 {
 	struct hd_struct *p = dev_to_part(dev);
 	int i;
@@ -300,40 +300,34 @@
 	.release	= part_release,
 };
 
-static inline void partition_sysfs_add_subdir(struct hd_struct *p)
+static void delete_partition_rcu_cb(struct rcu_head *head)
 {
-	struct kobject *k;
+	struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
 
-	k = kobject_get(&p->dev.kobj);
-	p->holder_dir = kobject_create_and_add("holders", k);
-	kobject_put(k);
+	part->start_sect = 0;
+	part->nr_sects = 0;
+	part_stat_set_all(part, 0);
+	put_device(part_to_dev(part));
 }
 
-static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
+void delete_partition(struct gendisk *disk, int partno)
 {
-	struct kobject *k;
+	struct disk_part_tbl *ptbl = disk->part_tbl;
+	struct hd_struct *part;
 
-	k = kobject_get(&disk->dev.kobj);
-	disk->holder_dir = kobject_create_and_add("holders", k);
-	disk->slave_dir = kobject_create_and_add("slaves", k);
-	kobject_put(k);
-}
-
-void delete_partition(struct gendisk *disk, int part)
-{
-	struct hd_struct *p = disk->part[part-1];
-
-	if (!p)
+	if (partno >= ptbl->len)
 		return;
-	if (!p->nr_sects)
+
+	part = ptbl->part[partno];
+	if (!part)
 		return;
-	disk->part[part-1] = NULL;
-	p->start_sect = 0;
-	p->nr_sects = 0;
-	part_stat_set_all(p, 0);
-	kobject_put(p->holder_dir);
-	device_del(&p->dev);
-	put_device(&p->dev);
+
+	blk_free_devt(part_devt(part));
+	rcu_assign_pointer(ptbl->part[partno], NULL);
+	kobject_put(part->holder_dir);
+	device_del(part_to_dev(part));
+
+	call_rcu(&part->rcu_head, delete_partition_rcu_cb);
 }
 
 static ssize_t whole_disk_show(struct device *dev,
@@ -344,102 +338,132 @@
 static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
 		   whole_disk_show, NULL);
 
-int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
+int add_partition(struct gendisk *disk, int partno,
+		  sector_t start, sector_t len, int flags)
 {
 	struct hd_struct *p;
+	dev_t devt = MKDEV(0, 0);
+	struct device *ddev = disk_to_dev(disk);
+	struct device *pdev;
+	struct disk_part_tbl *ptbl;
+	const char *dname;
 	int err;
 
+	err = disk_expand_part_tbl(disk, partno);
+	if (err)
+		return err;
+	ptbl = disk->part_tbl;
+
+	if (ptbl->part[partno])
+		return -EBUSY;
+
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 
 	if (!init_part_stats(p)) {
 		err = -ENOMEM;
-		goto out0;
+		goto out_free;
 	}
+	pdev = part_to_dev(p);
+
 	p->start_sect = start;
 	p->nr_sects = len;
-	p->partno = part;
-	p->policy = disk->policy;
+	p->partno = partno;
+	p->policy = get_disk_ro(disk);
 
-	if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1]))
-		snprintf(p->dev.bus_id, BUS_ID_SIZE,
-		"%sp%d", disk->dev.bus_id, part);
+	dname = dev_name(ddev);
+	if (isdigit(dname[strlen(dname) - 1]))
+		snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno);
 	else
-		snprintf(p->dev.bus_id, BUS_ID_SIZE,
-			 "%s%d", disk->dev.bus_id, part);
+		snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno);
 
-	device_initialize(&p->dev);
-	p->dev.devt = MKDEV(disk->major, disk->first_minor + part);
-	p->dev.class = &block_class;
-	p->dev.type = &part_type;
-	p->dev.parent = &disk->dev;
-	disk->part[part-1] = p;
+	device_initialize(pdev);
+	pdev->class = &block_class;
+	pdev->type = &part_type;
+	pdev->parent = ddev;
+
+	err = blk_alloc_devt(p, &devt);
+	if (err)
+		goto out_free;
+	pdev->devt = devt;
 
 	/* delay uevent until 'holders' subdir is created */
-	p->dev.uevent_suppress = 1;
-	err = device_add(&p->dev);
+	pdev->uevent_suppress = 1;
+	err = device_add(pdev);
 	if (err)
-		goto out1;
-	partition_sysfs_add_subdir(p);
-	p->dev.uevent_suppress = 0;
+		goto out_put;
+
+	err = -ENOMEM;
+	p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
+	if (!p->holder_dir)
+		goto out_del;
+
+	pdev->uevent_suppress = 0;
 	if (flags & ADDPART_FLAG_WHOLEDISK) {
-		err = device_create_file(&p->dev, &dev_attr_whole_disk);
+		err = device_create_file(pdev, &dev_attr_whole_disk);
 		if (err)
-			goto out2;
+			goto out_del;
 	}
 
+	/* everything is up and running, commence */
+	INIT_RCU_HEAD(&p->rcu_head);
+	rcu_assign_pointer(ptbl->part[partno], p);
+
 	/* suppress uevent if the disk supresses it */
-	if (!disk->dev.uevent_suppress)
-		kobject_uevent(&p->dev.kobj, KOBJ_ADD);
+	if (!ddev->uevent_suppress)
+		kobject_uevent(&pdev->kobj, KOBJ_ADD);
 
 	return 0;
 
-out2:
-	device_del(&p->dev);
-out1:
-	put_device(&p->dev);
-	free_part_stats(p);
-out0:
+out_free:
 	kfree(p);
 	return err;
+out_del:
+	kobject_put(p->holder_dir);
+	device_del(pdev);
+out_put:
+	put_device(pdev);
+	blk_free_devt(devt);
+	return err;
 }
 
 /* Not exported, helper to add_disk(). */
 void register_disk(struct gendisk *disk)
 {
+	struct device *ddev = disk_to_dev(disk);
 	struct block_device *bdev;
+	struct disk_part_iter piter;
+	struct hd_struct *part;
 	char *s;
-	int i;
-	struct hd_struct *p;
 	int err;
 
-	disk->dev.parent = disk->driverfs_dev;
-	disk->dev.devt = MKDEV(disk->major, disk->first_minor);
+	ddev->parent = disk->driverfs_dev;
 
-	strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE);
+	strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE);
 	/* ewww... some of these buggers have / in the name... */
-	s = strchr(disk->dev.bus_id, '/');
+	s = strchr(ddev->bus_id, '/');
 	if (s)
 		*s = '!';
 
 	/* delay uevents, until we scanned partition table */
-	disk->dev.uevent_suppress = 1;
+	ddev->uevent_suppress = 1;
 
-	if (device_add(&disk->dev))
+	if (device_add(ddev))
 		return;
 #ifndef CONFIG_SYSFS_DEPRECATED
-	err = sysfs_create_link(block_depr, &disk->dev.kobj,
-				kobject_name(&disk->dev.kobj));
+	err = sysfs_create_link(block_depr, &ddev->kobj,
+				kobject_name(&ddev->kobj));
 	if (err) {
-		device_del(&disk->dev);
+		device_del(ddev);
 		return;
 	}
 #endif
-	disk_sysfs_add_subdirs(disk);
+	disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
+	disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
 
 	/* No minors to use for partitions */
-	if (disk->minors == 1)
+	if (!disk_partitionable(disk))
 		goto exit;
 
 	/* No such device (e.g., media were just removed) */
@@ -458,41 +482,57 @@
 
 exit:
 	/* announce disk after possible partitions are created */
-	disk->dev.uevent_suppress = 0;
-	kobject_uevent(&disk->dev.kobj, KOBJ_ADD);
+	ddev->uevent_suppress = 0;
+	kobject_uevent(&ddev->kobj, KOBJ_ADD);
 
 	/* announce possible partitions */
-	for (i = 1; i < disk->minors; i++) {
-		p = disk->part[i-1];
-		if (!p || !p->nr_sects)
-			continue;
-		kobject_uevent(&p->dev.kobj, KOBJ_ADD);
-	}
+	disk_part_iter_init(&piter, disk, 0);
+	while ((part = disk_part_iter_next(&piter)))
+		kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
+	disk_part_iter_exit(&piter);
 }
 
 int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 {
+	struct disk_part_iter piter;
+	struct hd_struct *part;
 	struct parsed_partitions *state;
-	int p, res;
+	int p, highest, res;
 
 	if (bdev->bd_part_count)
 		return -EBUSY;
 	res = invalidate_partition(disk, 0);
 	if (res)
 		return res;
-	bdev->bd_invalidated = 0;
-	for (p = 1; p < disk->minors; p++)
-		delete_partition(disk, p);
+
+	disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
+	while ((part = disk_part_iter_next(&piter)))
+		delete_partition(disk, part->partno);
+	disk_part_iter_exit(&piter);
+
 	if (disk->fops->revalidate_disk)
 		disk->fops->revalidate_disk(disk);
+	check_disk_size_change(disk, bdev);
+	bdev->bd_invalidated = 0;
 	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
 		return 0;
 	if (IS_ERR(state))	/* I/O error reading the partition table */
 		return -EIO;
 
 	/* tell userspace that the media / partition table may have changed */
-	kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE);
+	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
 
+	/* Detect the highest partition number and preallocate
+	 * disk->part_tbl.  This is an optimization and not strictly
+	 * necessary.
+	 */
+	for (p = 1, highest = 0; p < state->limit; p++)
+		if (state->parts[p].size)
+			highest = p;
+
+	disk_expand_part_tbl(disk, highest);
+
+	/* add partitions */
 	for (p = 1; p < state->limit; p++) {
 		sector_t size = state->parts[p].size;
 		sector_t from = state->parts[p].from;
@@ -541,25 +581,31 @@
 
 void del_gendisk(struct gendisk *disk)
 {
-	int p;
+	struct disk_part_iter piter;
+	struct hd_struct *part;
 
 	/* invalidate stuff */
-	for (p = disk->minors - 1; p > 0; p--) {
-		invalidate_partition(disk, p);
-		delete_partition(disk, p);
+	disk_part_iter_init(&piter, disk,
+			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
+	while ((part = disk_part_iter_next(&piter))) {
+		invalidate_partition(disk, part->partno);
+		delete_partition(disk, part->partno);
 	}
+	disk_part_iter_exit(&piter);
+
 	invalidate_partition(disk, 0);
-	disk->capacity = 0;
+	blk_free_devt(disk_to_dev(disk)->devt);
+	set_capacity(disk, 0);
 	disk->flags &= ~GENHD_FL_UP;
 	unlink_gendisk(disk);
-	disk_stat_set_all(disk, 0);
-	disk->stamp = 0;
+	part_stat_set_all(&disk->part0, 0);
+	disk->part0.stamp = 0;
 
-	kobject_put(disk->holder_dir);
+	kobject_put(disk->part0.holder_dir);
 	kobject_put(disk->slave_dir);
 	disk->driverfs_dev = NULL;
 #ifndef CONFIG_SYSFS_DEPRECATED
-	sysfs_remove_link(block_depr, disk->dev.bus_id);
+	sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
 #endif
-	device_del(&disk->dev);
+	device_del(disk_to_dev(disk));
 }

diff --git a/fs/partitions/check.h b/fs/partitions/check.h
index 17ae8ec..98dbe1a 100644
--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h

@@ -5,15 +5,13 @@
  * add_gd_partition adds a partitions details to the devices partition
  * description.
  */
-enum { MAX_PART = 256 };
-
 struct parsed_partitions {
 	char name[BDEVNAME_SIZE];
 	struct {
 		sector_t from;
 		sector_t size;
 		int flags;
-	} parts[MAX_PART];
+	} parts[DISK_MAX_PARTS];
 	int next;
 	int limit;
 };

diff --git a/fs/splice.c b/fs/splice.c
index 1bbc6f4..a1e701c 100644
--- a/fs/splice.c
+++ b/fs/splice.c

@@ -898,6 +898,9 @@
 	if (unlikely(!(out->f_mode & FMODE_WRITE)))
 		return -EBADF;
 
+	if (unlikely(out->f_flags & O_APPEND))
+		return -EINVAL;
+
 	ret = rw_verify_area(WRITE, out, ppos, len);
 	if (unlikely(ret < 0))
 		return ret;

diff --git a/include/asm-mips/cevt-r4k.h b/include/asm-mips/cevt-r4k.h
new file mode 100644
index 0000000..fa4328f
--- /dev/null
+++ b/include/asm-mips/cevt-r4k.h

@@ -0,0 +1,46 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2008 Kevin D. Kissell
+ */
+
+/*
+ * Definitions used for common event timer implementation
+ * for MIPS 4K-type processors and their MIPS MT variants.
+ * Avoids unsightly extern declarations in C files.
+ */
+#ifndef __ASM_CEVT_R4K_H
+#define __ASM_CEVT_R4K_H
+
+DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device);
+
+void mips_event_handler(struct clock_event_device *dev);
+int c0_compare_int_usable(void);
+void mips_set_clock_mode(enum clock_event_mode, struct clock_event_device *);
+irqreturn_t c0_compare_interrupt(int, void *);
+
+extern struct irqaction c0_compare_irqaction;
+extern int cp0_timer_irq_installed;
+
+/*
+ * Possibly handle a performance counter interrupt.
+ * Return true if the timer interrupt should not be checked
+ */
+
+static inline int handle_perf_irq(int r2)
+{
+	/*
+	 * The performance counter overflow interrupt may be shared with the
+	 * timer interrupt (cp0_perfcount_irq < 0). If it is and a
+	 * performance counter has overflowed (perf_irq() == IRQ_HANDLED)
+	 * and we can't reliably determine if a counter interrupt has also
+	 * happened (!r2) then don't check for a timer interrupt.
+	 */
+	return (cp0_perfcount_irq < 0) &&
+		perf_irq() == IRQ_HANDLED &&
+		!r2;
+}
+
+#endif /* __ASM_CEVT_R4K_H */

diff --git a/include/asm-mips/irqflags.h b/include/asm-mips/irqflags.h
index 881e886..701ec0b 100644
--- a/include/asm-mips/irqflags.h
+++ b/include/asm-mips/irqflags.h

@@ -38,8 +38,17 @@
 	"	.set	pop						\n"
 	"	.endm");
 
+extern void smtc_ipi_replay(void);
+
 static inline void raw_local_irq_enable(void)
 {
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * SMTC kernel needs to do a software replay of queued
+	 * IPIs, at the cost of call overhead on each local_irq_enable()
+	 */
+	smtc_ipi_replay();
+#endif
 	__asm__ __volatile__(
 		"raw_local_irq_enable"
 		: /* no outputs */
@@ -47,6 +56,7 @@
 		: "memory");
 }
 
+
 /*
  * For cli() we have to insert nops to make sure that the new value
  * has actually arrived in the status register before the end of this
@@ -185,15 +195,14 @@
 	"	.set	pop						\n"
 	"	.endm							\n");
 
-extern void smtc_ipi_replay(void);
 
 static inline void raw_local_irq_restore(unsigned long flags)
 {
 	unsigned long __tmp1;
 
-#ifdef CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY
+#ifdef CONFIG_MIPS_MT_SMTC
 	/*
-	 * CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY does prompt replay of deferred
+	 * SMTC kernel needs to do a software replay of queued
 	 * IPIs, at the cost of branch and call overhead on each
 	 * local_irq_restore()
 	 */
@@ -208,6 +217,17 @@
 		: "memory");
 }
 
+static inline void __raw_local_irq_restore(unsigned long flags)
+{
+	unsigned long __tmp1;
+
+	__asm__ __volatile__(
+		"raw_local_irq_restore\t%0"
+		: "=r" (__tmp1)
+		: "0" (flags)
+		: "memory");
+}
+
 static inline int raw_irqs_disabled_flags(unsigned long flags)
 {
 #ifdef CONFIG_MIPS_MT_SMTC

diff --git a/include/asm-mips/mipsregs.h b/include/asm-mips/mipsregs.h
index a46f8e2..9798660 100644
--- a/include/asm-mips/mipsregs.h
+++ b/include/asm-mips/mipsregs.h

@@ -1462,7 +1462,7 @@
 {								\
 	unsigned int res;					\
 	unsigned int omt;					\
-	unsigned int flags;					\
+	unsigned long flags;					\
 								\
 	local_irq_save(flags);					\
 	omt = __dmt();						\
@@ -1480,7 +1480,7 @@
 {								\
 	unsigned int res;					\
 	unsigned int omt;					\
-	unsigned int flags;					\
+	unsigned long flags;					\
 								\
 	local_irq_save(flags);					\
 	omt = __dmt();						\
@@ -1498,7 +1498,7 @@
 {								\
 	unsigned int res;					\
 	unsigned int omt;					\
-	unsigned int flags;					\
+	unsigned long flags;					\
 								\
 	local_irq_save(flags);					\
 								\

diff --git a/include/asm-mips/smtc.h b/include/asm-mips/smtc.h
index 3639b28..ea60bf0 100644
--- a/include/asm-mips/smtc.h
+++ b/include/asm-mips/smtc.h

@@ -6,6 +6,7 @@
  */
 
 #include <asm/mips_mt.h>
+#include <asm/smtc_ipi.h>
 
 /*
  * System-wide SMTC status information
@@ -38,14 +39,15 @@
 struct task_struct;
 
 void smtc_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu);
-
+void self_ipi(struct smtc_ipi *);
 void smtc_flush_tlb_asid(unsigned long asid);
-extern int mipsmt_build_cpu_map(int startslot);
-extern void mipsmt_prepare_cpus(void);
+extern int smtc_build_cpu_map(int startslot);
+extern void smtc_prepare_cpus(int cpus);
 extern void smtc_smp_finish(void);
 extern void smtc_boot_secondary(int cpu, struct task_struct *t);
 extern void smtc_cpus_done(void);
 
+
 /*
  * Sharing the TLB between multiple VPEs means that the
  * "random" index selection function is not allowed to

diff --git a/include/asm-mips/sn/mapped_kernel.h b/include/asm-mips/sn/mapped_kernel.h
index c3dd5d0..721496a 100644
--- a/include/asm-mips/sn/mapped_kernel.h
+++ b/include/asm-mips/sn/mapped_kernel.h

@@ -5,6 +5,8 @@
 #ifndef __ASM_SN_MAPPED_KERNEL_H
 #define __ASM_SN_MAPPED_KERNEL_H
 
+#include <linux/mmzone.h>
+
 /*
  * Note on how mapped kernels work: the text and data section is
  * compiled at cksseg segment (LOADADDR = 0xc001c000), and the
@@ -29,10 +31,8 @@
 #define MAPPED_ADDR_RO_TO_PHYS(x)	(x - REP_BASE)
 #define MAPPED_ADDR_RW_TO_PHYS(x)	(x - REP_BASE - 16777216)
 
-#define MAPPED_KERN_RO_PHYSBASE(n) \
-			(PLAT_NODE_DATA(n)->kern_vars.kv_ro_baseaddr)
-#define MAPPED_KERN_RW_PHYSBASE(n) \
-			(PLAT_NODE_DATA(n)->kern_vars.kv_rw_baseaddr)
+#define MAPPED_KERN_RO_PHYSBASE(n) (hub_data(n)->kern_vars.kv_ro_baseaddr)
+#define MAPPED_KERN_RW_PHYSBASE(n) (hub_data(n)->kern_vars.kv_rw_baseaddr)
 
 #define MAPPED_KERN_RO_TO_PHYS(x) \
 				((unsigned long)MAPPED_ADDR_RO_TO_PHYS(x) | \

diff --git a/include/asm-mips/stackframe.h b/include/asm-mips/stackframe.h
index 051e1af..4c37c4e5 100644
--- a/include/asm-mips/stackframe.h
+++ b/include/asm-mips/stackframe.h

@@ -297,14 +297,31 @@
 #ifdef CONFIG_MIPS_MT_SMTC
 		.set	mips32r2
 		/*
-		 * This may not really be necessary if ints are already
-		 * inhibited here.
+		 * We need to make sure the read-modify-write
+		 * of Status below isn't perturbed by an interrupt
+		 * or cross-TC access, so we need to do at least a DMT,
+		 * protected by an interrupt-inhibit. But setting IXMT
+		 * also creates a few-cycle window where an IPI could
+		 * be queued and not be detected before potentially
+		 * returning to a WAIT or user-mode loop. It must be
+		 * replayed.
+		 *
+		 * We're in the middle of a context switch, and
+		 * we can't dispatch it directly without trashing
+		 * some registers, so we'll try to detect this unlikely
+		 * case and program a software interrupt in the VPE,
+		 * as would be done for a cross-VPE IPI.  To accomodate
+		 * the handling of that case, we're doing a DVPE instead
+		 * of just a DMT here to protect against other threads.
+		 * This is a lot of cruft to cover a tiny window.
+		 * If you can find a better design, implement it!
+		 *
 		 */
 		mfc0	v0, CP0_TCSTATUS
 		ori	v0, TCSTATUS_IXMT
 		mtc0	v0, CP0_TCSTATUS
 		_ehb
-		DMT	5				# dmt a1
+		DVPE	5				# dvpe a1
 		jal	mips_ihb
 #endif /* CONFIG_MIPS_MT_SMTC */
 		mfc0	a0, CP0_STATUS
@@ -325,17 +342,50 @@
  */
 		LONG_L	v1, PT_TCSTATUS(sp)
 		_ehb
-		mfc0	v0, CP0_TCSTATUS
+		mfc0	a0, CP0_TCSTATUS
 		andi	v1, TCSTATUS_IXMT
-		/* We know that TCStatua.IXMT should be set from above */
-		xori	v0, v0, TCSTATUS_IXMT
-		or	v0, v0, v1
-		mtc0	v0, CP0_TCSTATUS
-		_ehb
-		andi	a1, a1, VPECONTROL_TE
+		bnez	v1, 0f
+
+/*
+ * We'd like to detect any IPIs queued in the tiny window
+ * above and request an software interrupt to service them
+ * when we ERET.
+ *
+ * Computing the offset into the IPIQ array of the executing
+ * TC's IPI queue in-line would be tedious.  We use part of
+ * the TCContext register to hold 16 bits of offset that we
+ * can add in-line to find the queue head.
+ */
+		mfc0	v0, CP0_TCCONTEXT
+		la	a2, IPIQ
+		srl	v0, v0, 16
+		addu	a2, a2, v0
+		LONG_L	v0, 0(a2)
+		beqz	v0, 0f
+/*
+ * If we have a queue, provoke dispatch within the VPE by setting C_SW1
+ */
+		mfc0	v0, CP0_CAUSE
+		ori	v0, v0, C_SW1
+		mtc0	v0, CP0_CAUSE
+0:
+		/*
+		 * This test should really never branch but
+		 * let's be prudent here.  Having atomized
+		 * the shared register modifications, we can
+		 * now EVPE, and must do so before interrupts
+		 * are potentially re-enabled.
+		 */
+		andi	a1, a1, MVPCONTROL_EVP
 		beqz	a1, 1f
-		emt
+		evpe
 1:
+		/* We know that TCStatua.IXMT should be set from above */
+		xori	a0, a0, TCSTATUS_IXMT
+		or	a0, a0, v1
+		mtc0	a0, CP0_TCSTATUS
+		_ehb
+
 		.set	mips0
 #endif /* CONFIG_MIPS_MT_SMTC */
 		LONG_L	v1, PT_EPC(sp)

diff --git a/include/asm-x86/a.out-core.h b/include/asm-x86/a.out-core.h
index 714207a..f570576 100644
--- a/include/asm-x86/a.out-core.h
+++ b/include/asm-x86/a.out-core.h

@@ -9,8 +9,8 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
-#ifndef _ASM_A_OUT_CORE_H
-#define _ASM_A_OUT_CORE_H
+#ifndef ASM_X86__A_OUT_CORE_H
+#define ASM_X86__A_OUT_CORE_H
 
 #ifdef __KERNEL__
 #ifdef CONFIG_X86_32
@@ -70,4 +70,4 @@
 
 #endif /* CONFIG_X86_32 */
 #endif /* __KERNEL__ */
-#endif /* _ASM_A_OUT_CORE_H */
+#endif /* ASM_X86__A_OUT_CORE_H */

diff --git a/include/asm-x86/a.out.h b/include/asm-x86/a.out.h
index 4684f97..0948748 100644
--- a/include/asm-x86/a.out.h
+++ b/include/asm-x86/a.out.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_A_OUT_H
-#define _ASM_X86_A_OUT_H
+#ifndef ASM_X86__A_OUT_H
+#define ASM_X86__A_OUT_H
 
 struct exec
 {
@@ -17,4 +17,4 @@
 #define N_DRSIZE(a)	((a).a_drsize)
 #define N_SYMSIZE(a)	((a).a_syms)
 
-#endif /* _ASM_X86_A_OUT_H */
+#endif /* ASM_X86__A_OUT_H */

diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h
index 35d1743..392e173 100644
--- a/include/asm-x86/acpi.h
+++ b/include/asm-x86/acpi.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_ACPI_H
-#define _ASM_X86_ACPI_H
+#ifndef ASM_X86__ACPI_H
+#define ASM_X86__ACPI_H
 
 /*
  *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
@@ -175,4 +175,4 @@
 
 #define acpi_unlazy_tlb(x)	leave_mm(x)
 
-#endif /*__X86_ASM_ACPI_H*/
+#endif /* ASM_X86__ACPI_H */

diff --git a/include/asm-x86/agp.h b/include/asm-x86/agp.h
index e4004a9..3617fd4 100644
--- a/include/asm-x86/agp.h
+++ b/include/asm-x86/agp.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_AGP_H
-#define _ASM_X86_AGP_H
+#ifndef ASM_X86__AGP_H
+#define ASM_X86__AGP_H
 
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -32,4 +32,4 @@
 #define free_gatt_pages(table, order)	\
 	free_pages((unsigned long)(table), (order))
 
-#endif
+#endif /* ASM_X86__AGP_H */

diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h
index f6aa18e..22d3c98 100644
--- a/include/asm-x86/alternative.h
+++ b/include/asm-x86/alternative.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_ALTERNATIVE_H
-#define _ASM_X86_ALTERNATIVE_H
+#ifndef ASM_X86__ALTERNATIVE_H
+#define ASM_X86__ALTERNATIVE_H
 
 #include <linux/types.h>
 #include <linux/stddef.h>
@@ -180,4 +180,4 @@
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_early(void *addr, const void *opcode, size_t len);
 
-#endif /* _ASM_X86_ALTERNATIVE_H */
+#endif /* ASM_X86__ALTERNATIVE_H */

diff --git a/include/asm-x86/amd_iommu.h b/include/asm-x86/amd_iommu.h
index 30a1204..783f43e 100644
--- a/include/asm-x86/amd_iommu.h
+++ b/include/asm-x86/amd_iommu.h

@@ -17,8 +17,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#ifndef _ASM_X86_AMD_IOMMU_H
-#define _ASM_X86_AMD_IOMMU_H
+#ifndef ASM_X86__AMD_IOMMU_H
+#define ASM_X86__AMD_IOMMU_H
 
 #ifdef CONFIG_AMD_IOMMU
 extern int amd_iommu_init(void);
@@ -29,4 +29,4 @@
 static inline void amd_iommu_detect(void) { }
 #endif
 
-#endif
+#endif /* ASM_X86__AMD_IOMMU_H */

diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h
index dcc8120..1ffa4e5 100644
--- a/include/asm-x86/amd_iommu_types.h
+++ b/include/asm-x86/amd_iommu_types.h

@@ -17,8 +17,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#ifndef __AMD_IOMMU_TYPES_H__
-#define __AMD_IOMMU_TYPES_H__
+#ifndef ASM_X86__AMD_IOMMU_TYPES_H
+#define ASM_X86__AMD_IOMMU_TYPES_H
 
 #include <linux/types.h>
 #include <linux/list.h>
@@ -341,4 +341,4 @@
 	return (((u16)bus) << 8) | devfn;
 }
 
-#endif
+#endif /* ASM_X86__AMD_IOMMU_TYPES_H */

diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 133c998..65590c9 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_APIC_H
-#define _ASM_X86_APIC_H
+#ifndef ASM_X86__APIC_H
+#define ASM_X86__APIC_H
 
 #include <linux/pm.h>
 #include <linux/delay.h>
@@ -54,6 +54,11 @@
 #endif
 
 extern int is_vsmp_box(void);
+extern void xapic_wait_icr_idle(void);
+extern u32 safe_xapic_wait_icr_idle(void);
+extern u64 xapic_icr_read(void);
+extern void xapic_icr_write(u32, u32);
+extern int setup_profiling_timer(unsigned int);
 
 static inline void native_apic_write(unsigned long reg, u32 v)
 {
@@ -76,9 +81,7 @@
 static inline void ack_APIC_irq(void)
 {
 	/*
-	 * ack_APIC_irq() actually gets compiled as a single instruction:
-	 * - a single rmw on Pentium/82489DX
-	 * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
+	 * ack_APIC_irq() actually gets compiled as a single instruction
 	 * ... yummie.
 	 */
 
@@ -128,4 +131,4 @@
 
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
-#endif /* __ASM_APIC_H */
+#endif /* ASM_X86__APIC_H */

diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h
index 6b9008c..c40687d 100644
--- a/include/asm-x86/apicdef.h
+++ b/include/asm-x86/apicdef.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_APICDEF_H
-#define _ASM_X86_APICDEF_H
+#ifndef ASM_X86__APICDEF_H
+#define ASM_X86__APICDEF_H
 
 /*
  * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
@@ -411,4 +411,4 @@
 #else
  #define BAD_APICID 0xFFFFu
 #endif
-#endif
+#endif /* ASM_X86__APICDEF_H */

diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h
index 8411750..72adc3a 100644
--- a/include/asm-x86/arch_hooks.h
+++ b/include/asm-x86/arch_hooks.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_ARCH_HOOKS_H
-#define _ASM_ARCH_HOOKS_H
+#ifndef ASM_X86__ARCH_HOOKS_H
+#define ASM_X86__ARCH_HOOKS_H
 
 #include <linux/interrupt.h>
 
@@ -25,4 +25,4 @@
 extern void time_init_hook(void);
 extern void mca_nmi_hook(void);
 
-#endif
+#endif /* ASM_X86__ARCH_HOOKS_H */

diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h
index 9722032..e1355f4 100644
--- a/include/asm-x86/asm.h
+++ b/include/asm-x86/asm.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_ASM_H
-#define _ASM_X86_ASM_H
+#ifndef ASM_X86__ASM_H
+#define ASM_X86__ASM_H
 
 #ifdef __ASSEMBLY__
 # define __ASM_FORM(x)	x
@@ -20,17 +20,22 @@
 
 #define _ASM_PTR	__ASM_SEL(.long, .quad)
 #define _ASM_ALIGN	__ASM_SEL(.balign 4, .balign 8)
-#define _ASM_MOV_UL	__ASM_SIZE(mov)
 
+#define _ASM_MOV	__ASM_SIZE(mov)
 #define _ASM_INC	__ASM_SIZE(inc)
 #define _ASM_DEC	__ASM_SIZE(dec)
 #define _ASM_ADD	__ASM_SIZE(add)
 #define _ASM_SUB	__ASM_SIZE(sub)
 #define _ASM_XADD	__ASM_SIZE(xadd)
+
 #define _ASM_AX		__ASM_REG(ax)
 #define _ASM_BX		__ASM_REG(bx)
 #define _ASM_CX		__ASM_REG(cx)
 #define _ASM_DX		__ASM_REG(dx)
+#define _ASM_SP		__ASM_REG(sp)
+#define _ASM_BP		__ASM_REG(bp)
+#define _ASM_SI		__ASM_REG(si)
+#define _ASM_DI		__ASM_REG(di)
 
 /* Exception table entry */
 # define _ASM_EXTABLE(from,to) \
@@ -39,4 +44,4 @@
 	_ASM_PTR #from "," #to "\n" \
 	" .previous\n"
 
-#endif /* _ASM_X86_ASM_H */
+#endif /* ASM_X86__ASM_H */

diff --git a/include/asm-x86/atomic_32.h b/include/asm-x86/atomic_32.h
index 21a4825..14d3f0b 100644
--- a/include/asm-x86/atomic_32.h
+++ b/include/asm-x86/atomic_32.h

@@ -1,5 +1,5 @@
-#ifndef __ARCH_I386_ATOMIC__
-#define __ARCH_I386_ATOMIC__
+#ifndef ASM_X86__ATOMIC_32_H
+#define ASM_X86__ATOMIC_32_H
 
 #include <linux/compiler.h>
 #include <asm/processor.h>
@@ -256,4 +256,4 @@
 #define smp_mb__after_atomic_inc()	barrier()
 
 #include <asm-generic/atomic.h>
-#endif
+#endif /* ASM_X86__ATOMIC_32_H */

diff --git a/include/asm-x86/atomic_64.h b/include/asm-x86/atomic_64.h
index 91c7d03..2cb218c 100644
--- a/include/asm-x86/atomic_64.h
+++ b/include/asm-x86/atomic_64.h

@@ -1,5 +1,5 @@
-#ifndef __ARCH_X86_64_ATOMIC__
-#define __ARCH_X86_64_ATOMIC__
+#ifndef ASM_X86__ATOMIC_64_H
+#define ASM_X86__ATOMIC_64_H
 
 #include <asm/alternative.h>
 #include <asm/cmpxchg.h>
@@ -470,4 +470,4 @@
 #define smp_mb__after_atomic_inc()	barrier()
 
 #include <asm-generic/atomic.h>
-#endif
+#endif /* ASM_X86__ATOMIC_64_H */

diff --git a/include/asm-x86/auxvec.h b/include/asm-x86/auxvec.h
index 87f5e6d..12c7cac 100644
--- a/include/asm-x86/auxvec.h
+++ b/include/asm-x86/auxvec.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_AUXVEC_H
-#define _ASM_X86_AUXVEC_H
+#ifndef ASM_X86__AUXVEC_H
+#define ASM_X86__AUXVEC_H
 /*
  * Architecture-neutral AT_ values in 0-17, leave some room
  * for more of them, start the x86-specific ones at 32.
@@ -9,4 +9,4 @@
 #endif
 #define AT_SYSINFO_EHDR		33
 
-#endif
+#endif /* ASM_X86__AUXVEC_H */

diff --git a/include/asm-x86/bios_ebda.h b/include/asm-x86/bios_ebda.h
index 0033e50..ec42ed8 100644
--- a/include/asm-x86/bios_ebda.h
+++ b/include/asm-x86/bios_ebda.h

@@ -1,5 +1,5 @@
-#ifndef _MACH_BIOS_EBDA_H
-#define _MACH_BIOS_EBDA_H
+#ifndef ASM_X86__BIOS_EBDA_H
+#define ASM_X86__BIOS_EBDA_H
 
 #include <asm/io.h>
 
@@ -16,4 +16,4 @@
 
 void reserve_ebda_region(void);
 
-#endif /* _MACH_BIOS_EBDA_H */
+#endif /* ASM_X86__BIOS_EBDA_H */

diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index cfb2b64..61989b9 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_BITOPS_H
-#define _ASM_X86_BITOPS_H
+#ifndef ASM_X86__BITOPS_H
+#define ASM_X86__BITOPS_H
 
 /*
  * Copyright 1992, Linus Torvalds.
@@ -458,4 +458,4 @@
 #include <asm-generic/bitops/minix.h>
 
 #endif /* __KERNEL__ */
-#endif	/* _ASM_X86_BITOPS_H */
+#endif /* ASM_X86__BITOPS_H */

diff --git a/include/asm-x86/boot.h b/include/asm-x86/boot.h
index 2faed7e..825de5d 100644
--- a/include/asm-x86/boot.h
+++ b/include/asm-x86/boot.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_BOOT_H
-#define _ASM_BOOT_H
+#ifndef ASM_X86__BOOT_H
+#define ASM_X86__BOOT_H
 
 /* Don't touch these, unless you really know what you're doing. */
 #define DEF_INITSEG	0x9000
@@ -25,4 +25,4 @@
 #define BOOT_STACK_SIZE	0x1000
 #endif
 
-#endif /* _ASM_BOOT_H */
+#endif /* ASM_X86__BOOT_H */

diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index ae22bdf..ccf027e 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_BOOTPARAM_H
-#define _ASM_BOOTPARAM_H
+#ifndef ASM_X86__BOOTPARAM_H
+#define ASM_X86__BOOTPARAM_H
 
 #include <linux/types.h>
 #include <linux/screen_info.h>
@@ -108,4 +108,4 @@
 	__u8  _pad9[276];				/* 0xeec */
 } __attribute__((packed));
 
-#endif /* _ASM_BOOTPARAM_H */
+#endif /* ASM_X86__BOOTPARAM_H */

diff --git a/include/asm-x86/bug.h b/include/asm-x86/bug.h
index b69aa64..91ad43a 100644
--- a/include/asm-x86/bug.h
+++ b/include/asm-x86/bug.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_BUG_H
-#define _ASM_X86_BUG_H
+#ifndef ASM_X86__BUG_H
+#define ASM_X86__BUG_H
 
 #ifdef CONFIG_BUG
 #define HAVE_ARCH_BUG
@@ -36,4 +36,4 @@
 #endif /* !CONFIG_BUG */
 
 #include <asm-generic/bug.h>
-#endif
+#endif /* ASM_X86__BUG_H */

diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h
index 021cbdd..4761c46 100644
--- a/include/asm-x86/bugs.h
+++ b/include/asm-x86/bugs.h

@@ -1,7 +1,7 @@
-#ifndef _ASM_X86_BUGS_H
-#define _ASM_X86_BUGS_H
+#ifndef ASM_X86__BUGS_H
+#define ASM_X86__BUGS_H
 
 extern void check_bugs(void);
 int ppro_with_ram_bug(void);
 
-#endif /* _ASM_X86_BUGS_H */
+#endif /* ASM_X86__BUGS_H */

diff --git a/include/asm-x86/byteorder.h b/include/asm-x86/byteorder.h
index e02ae2d..722f27d 100644
--- a/include/asm-x86/byteorder.h
+++ b/include/asm-x86/byteorder.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_BYTEORDER_H
-#define _ASM_X86_BYTEORDER_H
+#ifndef ASM_X86__BYTEORDER_H
+#define ASM_X86__BYTEORDER_H
 
 #include <asm/types.h>
 #include <linux/compiler.h>
@@ -78,4 +78,4 @@
 
 #include <linux/byteorder/little_endian.h>
 
-#endif /* _ASM_X86_BYTEORDER_H */
+#endif /* ASM_X86__BYTEORDER_H */

diff --git a/include/asm-x86/cache.h b/include/asm-x86/cache.h
index 1e0bac8..ea3f1cc 100644
--- a/include/asm-x86/cache.h
+++ b/include/asm-x86/cache.h

@@ -1,5 +1,5 @@
-#ifndef _ARCH_X86_CACHE_H
-#define _ARCH_X86_CACHE_H
+#ifndef ASM_X86__CACHE_H
+#define ASM_X86__CACHE_H
 
 /* L1 cache line size */
 #define L1_CACHE_SHIFT	(CONFIG_X86_L1_CACHE_SHIFT)
@@ -17,4 +17,4 @@
 #endif
 #endif
 
-#endif
+#endif /* ASM_X86__CACHE_H */

diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index f4c0ab5..59859cb 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_CACHEFLUSH_H
-#define _ASM_X86_CACHEFLUSH_H
+#ifndef ASM_X86__CACHEFLUSH_H
+#define ASM_X86__CACHEFLUSH_H
 
 /* Keep includes the same across arches.  */
 #include <linux/mm.h>
@@ -112,4 +112,4 @@
 }
 #endif
 
-#endif
+#endif /* ASM_X86__CACHEFLUSH_H */

diff --git a/include/asm-x86/calgary.h b/include/asm-x86/calgary.h
index 67f6040..933fd27 100644
--- a/include/asm-x86/calgary.h
+++ b/include/asm-x86/calgary.h

@@ -21,8 +21,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#ifndef _ASM_X86_64_CALGARY_H
-#define _ASM_X86_64_CALGARY_H
+#ifndef ASM_X86__CALGARY_H
+#define ASM_X86__CALGARY_H
 
 #include <linux/spinlock.h>
 #include <linux/device.h>
@@ -69,4 +69,4 @@
 static inline void detect_calgary(void) { return; }
 #endif
 
-#endif /* _ASM_X86_64_CALGARY_H */
+#endif /* ASM_X86__CALGARY_H */

diff --git a/include/asm-x86/checksum_32.h b/include/asm-x86/checksum_32.h
index 52bbb0d..d041e8c 100644
--- a/include/asm-x86/checksum_32.h
+++ b/include/asm-x86/checksum_32.h

@@ -1,5 +1,5 @@
-#ifndef _I386_CHECKSUM_H
-#define _I386_CHECKSUM_H
+#ifndef ASM_X86__CHECKSUM_32_H
+#define ASM_X86__CHECKSUM_32_H
 
 #include <linux/in6.h>
 
@@ -186,4 +186,4 @@
 	return (__force __wsum)-1; /* invalid checksum */
 }
 
-#endif
+#endif /* ASM_X86__CHECKSUM_32_H */

diff --git a/include/asm-x86/checksum_64.h b/include/asm-x86/checksum_64.h
index 8bd861c..110f403 100644
--- a/include/asm-x86/checksum_64.h
+++ b/include/asm-x86/checksum_64.h

@@ -1,5 +1,5 @@
-#ifndef _X86_64_CHECKSUM_H
-#define _X86_64_CHECKSUM_H
+#ifndef ASM_X86__CHECKSUM_64_H
+#define ASM_X86__CHECKSUM_64_H
 
 /*
  * Checksums for x86-64
@@ -188,4 +188,4 @@
 	return a;
 }
 
-#endif
+#endif /* ASM_X86__CHECKSUM_64_H */

diff --git a/include/asm-x86/cmpxchg_32.h b/include/asm-x86/cmpxchg_32.h
index bf5a69d..0622e45 100644
--- a/include/asm-x86/cmpxchg_32.h
+++ b/include/asm-x86/cmpxchg_32.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_CMPXCHG_H
-#define __ASM_CMPXCHG_H
+#ifndef ASM_X86__CMPXCHG_32_H
+#define ASM_X86__CMPXCHG_32_H
 
 #include <linux/bitops.h> /* for LOCK_PREFIX */
 
@@ -341,4 +341,4 @@
 
 #endif
 
-#endif
+#endif /* ASM_X86__CMPXCHG_32_H */

diff --git a/include/asm-x86/cmpxchg_64.h b/include/asm-x86/cmpxchg_64.h
index 17463cc..63c1a5e 100644
--- a/include/asm-x86/cmpxchg_64.h
+++ b/include/asm-x86/cmpxchg_64.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_CMPXCHG_H
-#define __ASM_CMPXCHG_H
+#ifndef ASM_X86__CMPXCHG_64_H
+#define ASM_X86__CMPXCHG_64_H
 
 #include <asm/alternative.h> /* Provides LOCK_PREFIX */
 
@@ -182,4 +182,4 @@
 	cmpxchg_local((ptr), (o), (n));					\
 })
 
-#endif
+#endif /* ASM_X86__CMPXCHG_64_H */

diff --git a/include/asm-x86/compat.h b/include/asm-x86/compat.h
index 1793ac3..6732b15 100644
--- a/include/asm-x86/compat.h
+++ b/include/asm-x86/compat.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_COMPAT_H
-#define _ASM_X86_64_COMPAT_H
+#ifndef ASM_X86__COMPAT_H
+#define ASM_X86__COMPAT_H
 
 /*
  * Architecture specific compatibility types
@@ -215,4 +215,4 @@
 	return current_thread_info()->status & TS_COMPAT;
 }
 
-#endif /* _ASM_X86_64_COMPAT_H */
+#endif /* ASM_X86__COMPAT_H */

diff --git a/include/asm-x86/cpu.h b/include/asm-x86/cpu.h
index 73f2ea8..83a1150 100644
--- a/include/asm-x86/cpu.h
+++ b/include/asm-x86/cpu.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_I386_CPU_H_
-#define _ASM_I386_CPU_H_
+#ifndef ASM_X86__CPU_H
+#define ASM_X86__CPU_H
 
 #include <linux/device.h>
 #include <linux/cpu.h>
@@ -17,4 +17,4 @@
 #endif
 
 DECLARE_PER_CPU(int, cpu_state);
-#endif /* _ASM_I386_CPU_H_ */
+#endif /* ASM_X86__CPU_H */

diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index cfcfb0a..250fa0c 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h

@@ -1,8 +1,8 @@
 /*
  * Defines x86 CPU feature bits
  */
-#ifndef _ASM_X86_CPUFEATURE_H
-#define _ASM_X86_CPUFEATURE_H
+#ifndef ASM_X86__CPUFEATURE_H
+#define ASM_X86__CPUFEATURE_H
 
 #include <asm/required-features.h>
 
@@ -224,4 +224,4 @@
 
 #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
 
-#endif /* _ASM_X86_CPUFEATURE_H */
+#endif /* ASM_X86__CPUFEATURE_H */

diff --git a/include/asm-x86/current.h b/include/asm-x86/current.h
index 7515c19..a863ead 100644
--- a/include/asm-x86/current.h
+++ b/include/asm-x86/current.h

@@ -1,5 +1,5 @@
-#ifndef _X86_CURRENT_H
-#define _X86_CURRENT_H
+#ifndef ASM_X86__CURRENT_H
+#define ASM_X86__CURRENT_H
 
 #ifdef CONFIG_X86_32
 #include <linux/compiler.h>
@@ -36,4 +36,4 @@
 
 #define current get_current()
 
-#endif /* X86_CURRENT_H */
+#endif /* ASM_X86__CURRENT_H */

diff --git a/include/asm-x86/debugreg.h b/include/asm-x86/debugreg.h
index c6344d5..ecb6907 100644
--- a/include/asm-x86/debugreg.h
+++ b/include/asm-x86/debugreg.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DEBUGREG_H
-#define _ASM_X86_DEBUGREG_H
+#ifndef ASM_X86__DEBUGREG_H
+#define ASM_X86__DEBUGREG_H
 
 
 /* Indicate the register numbers for a number of the specific
@@ -67,4 +67,4 @@
 #define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
 #define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
 
-#endif
+#endif /* ASM_X86__DEBUGREG_H */

diff --git a/include/asm-x86/delay.h b/include/asm-x86/delay.h
index 409a649..8a0da95 100644
--- a/include/asm-x86/delay.h
+++ b/include/asm-x86/delay.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DELAY_H
-#define _ASM_X86_DELAY_H
+#ifndef ASM_X86__DELAY_H
+#define ASM_X86__DELAY_H
 
 /*
  * Copyright (C) 1993 Linus Torvalds
@@ -28,4 +28,4 @@
 
 void use_tsc_delay(void);
 
-#endif /* _ASM_X86_DELAY_H */
+#endif /* ASM_X86__DELAY_H */

diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index a44c4dc..b73fea5 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_DESC_H_
-#define _ASM_DESC_H_
+#ifndef ASM_X86__DESC_H
+#define ASM_X86__DESC_H
 
 #ifndef __ASSEMBLY__
 #include <asm/desc_defs.h>
@@ -397,4 +397,4 @@
 
 #endif /* __ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__DESC_H */

diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h
index f7bacf3..b881db6 100644
--- a/include/asm-x86/desc_defs.h
+++ b/include/asm-x86/desc_defs.h

@@ -1,6 +1,6 @@
 /* Written 2000 by Andi Kleen */
-#ifndef __ARCH_DESC_DEFS_H
-#define __ARCH_DESC_DEFS_H
+#ifndef ASM_X86__DESC_DEFS_H
+#define ASM_X86__DESC_DEFS_H
 
 /*
  * Segment descriptor structure definitions, usable from both x86_64 and i386
@@ -92,4 +92,4 @@
 
 #endif /* !__ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__DESC_DEFS_H */

diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h
index 3c034f4..1bece04 100644
--- a/include/asm-x86/device.h
+++ b/include/asm-x86/device.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DEVICE_H
-#define _ASM_X86_DEVICE_H
+#ifndef ASM_X86__DEVICE_H
+#define ASM_X86__DEVICE_H
 
 struct dev_archdata {
 #ifdef CONFIG_ACPI
@@ -13,4 +13,4 @@
 #endif
 };
 
-#endif /* _ASM_X86_DEVICE_H */
+#endif /* ASM_X86__DEVICE_H */

diff --git a/include/asm-x86/div64.h b/include/asm-x86/div64.h
index 9a2d644..f9530f2 100644
--- a/include/asm-x86/div64.h
+++ b/include/asm-x86/div64.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DIV64_H
-#define _ASM_X86_DIV64_H
+#ifndef ASM_X86__DIV64_H
+#define ASM_X86__DIV64_H
 
 #ifdef CONFIG_X86_32
 
@@ -57,4 +57,4 @@
 # include <asm-generic/div64.h>
 #endif /* CONFIG_X86_32 */
 
-#endif /* _ASM_X86_DIV64_H */
+#endif /* ASM_X86__DIV64_H */

diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h
index ad9cd6d..5d200e7 100644
--- a/include/asm-x86/dma-mapping.h
+++ b/include/asm-x86/dma-mapping.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_DMA_MAPPING_H_
-#define _ASM_DMA_MAPPING_H_
+#ifndef ASM_X86__DMA_MAPPING_H
+#define ASM_X86__DMA_MAPPING_H
 
 /*
  * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
@@ -250,4 +250,4 @@
 #define dma_is_consistent(d, h)	(1)
 
 #include <asm-generic/dma-coherent.h>
-#endif
+#endif /* ASM_X86__DMA_MAPPING_H */

diff --git a/include/asm-x86/dma.h b/include/asm-x86/dma.h
index ca1098a..c9f7a4e 100644
--- a/include/asm-x86/dma.h
+++ b/include/asm-x86/dma.h

@@ -5,8 +5,8 @@
  * and John Boyd, Nov. 1992.
  */
 
-#ifndef _ASM_X86_DMA_H
-#define _ASM_X86_DMA_H
+#ifndef ASM_X86__DMA_H
+#define ASM_X86__DMA_H
 
 #include <linux/spinlock.h>	/* And spinlocks */
 #include <asm/io.h>		/* need byte IO */
@@ -315,4 +315,4 @@
 #define isa_dma_bridge_buggy	(0)
 #endif
 
-#endif /* _ASM_X86_DMA_H */
+#endif /* ASM_X86__DMA_H */

diff --git a/include/asm-x86/dmi.h b/include/asm-x86/dmi.h
index 58a8657..1cff6fe 100644
--- a/include/asm-x86/dmi.h
+++ b/include/asm-x86/dmi.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_DMI_H
-#define _ASM_X86_DMI_H
+#ifndef ASM_X86__DMI_H
+#define ASM_X86__DMI_H
 
 #include <asm/io.h>
 
@@ -23,4 +23,4 @@
 #define dmi_ioremap early_ioremap
 #define dmi_iounmap early_iounmap
 
-#endif
+#endif /* ASM_X86__DMI_H */

diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h
index 7881368..c3c953a 100644
--- a/include/asm-x86/ds.h
+++ b/include/asm-x86/ds.h

@@ -2,71 +2,237 @@
  * Debug Store (DS) support
  *
  * This provides a low-level interface to the hardware's Debug Store
- * feature that is used for last branch recording (LBR) and
+ * feature that is used for branch trace store (BTS) and
  * precise-event based sampling (PEBS).
  *
- * Different architectures use a different DS layout/pointer size.
- * The below functions therefore work on a void*.
+ * It manages:
+ * - per-thread and per-cpu allocation of BTS and PEBS
+ * - buffer memory allocation (optional)
+ * - buffer overflow handling
+ * - buffer access
+ *
+ * It assumes:
+ * - get_task_struct on all parameter tasks
+ * - current is allowed to trace parameter tasks
  *
  *
- * Since there is no user for PEBS, yet, only LBR (or branch
- * trace store, BTS) is supported.
- *
- *
- * Copyright (C) 2007 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
+ * Copyright (C) 2007-2008 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
  */
 
-#ifndef _ASM_X86_DS_H
-#define _ASM_X86_DS_H
+#ifndef ASM_X86__DS_H
+#define ASM_X86__DS_H
+
+#ifdef CONFIG_X86_DS
 
 #include <linux/types.h>
 #include <linux/init.h>
 
-struct cpuinfo_x86;
 
+struct task_struct;
 
-/* a branch trace record entry
+/*
+ * Request BTS or PEBS
  *
- * In order to unify the interface between various processor versions,
- * we use the below data structure for all processors.
+ * Due to alignement constraints, the actual buffer may be slightly
+ * smaller than the requested or provided buffer.
+ *
+ * Returns 0 on success; -Eerrno otherwise
+ *
+ * task: the task to request recording for;
+ *       NULL for per-cpu recording on the current cpu
+ * base: the base pointer for the (non-pageable) buffer;
+ *       NULL if buffer allocation requested
+ * size: the size of the requested or provided buffer
+ * ovfl: pointer to a function to be called on buffer overflow;
+ *       NULL if cyclic buffer requested
  */
-enum bts_qualifier {
-	BTS_INVALID = 0,
-	BTS_BRANCH,
-	BTS_TASK_ARRIVES,
-	BTS_TASK_DEPARTS
+typedef void (*ds_ovfl_callback_t)(struct task_struct *);
+extern int ds_request_bts(struct task_struct *task, void *base, size_t size,
+			  ds_ovfl_callback_t ovfl);
+extern int ds_request_pebs(struct task_struct *task, void *base, size_t size,
+			   ds_ovfl_callback_t ovfl);
+
+/*
+ * Release BTS or PEBS resources
+ *
+ * Frees buffers allocated on ds_request.
+ *
+ * Returns 0 on success; -Eerrno otherwise
+ *
+ * task: the task to release resources for;
+ *       NULL to release resources for the current cpu
+ */
+extern int ds_release_bts(struct task_struct *task);
+extern int ds_release_pebs(struct task_struct *task);
+
+/*
+ * Return the (array) index of the write pointer.
+ * (assuming an array of BTS/PEBS records)
+ *
+ * Returns -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ * pos (out): if not NULL, will hold the result
+ */
+extern int ds_get_bts_index(struct task_struct *task, size_t *pos);
+extern int ds_get_pebs_index(struct task_struct *task, size_t *pos);
+
+/*
+ * Return the (array) index one record beyond the end of the array.
+ * (assuming an array of BTS/PEBS records)
+ *
+ * Returns -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ * pos (out): if not NULL, will hold the result
+ */
+extern int ds_get_bts_end(struct task_struct *task, size_t *pos);
+extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
+
+/*
+ * Provide a pointer to the BTS/PEBS record at parameter index.
+ * (assuming an array of BTS/PEBS records)
+ *
+ * The pointer points directly into the buffer. The user is
+ * responsible for copying the record.
+ *
+ * Returns the size of a single record on success; -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ * index: the index of the requested record
+ * record (out): pointer to the requested record
+ */
+extern int ds_access_bts(struct task_struct *task,
+			 size_t index, const void **record);
+extern int ds_access_pebs(struct task_struct *task,
+			  size_t index, const void **record);
+
+/*
+ * Write one or more BTS/PEBS records at the write pointer index and
+ * advance the write pointer.
+ *
+ * If size is not a multiple of the record size, trailing bytes are
+ * zeroed out.
+ *
+ * May result in one or more overflow notifications.
+ *
+ * If called during overflow handling, that is, with index >=
+ * interrupt threshold, the write will wrap around.
+ *
+ * An overflow notification is given if and when the interrupt
+ * threshold is reached during or after the write.
+ *
+ * Returns the number of bytes written or -Eerrno.
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ * buffer: the buffer to write
+ * size: the size of the buffer
+ */
+extern int ds_write_bts(struct task_struct *task,
+			const void *buffer, size_t size);
+extern int ds_write_pebs(struct task_struct *task,
+			 const void *buffer, size_t size);
+
+/*
+ * Same as ds_write_bts/pebs, but omit ownership checks.
+ *
+ * This is needed to have some other task than the owner of the
+ * BTS/PEBS buffer or the parameter task itself write into the
+ * respective buffer.
+ */
+extern int ds_unchecked_write_bts(struct task_struct *task,
+				  const void *buffer, size_t size);
+extern int ds_unchecked_write_pebs(struct task_struct *task,
+				   const void *buffer, size_t size);
+
+/*
+ * Reset the write pointer of the BTS/PEBS buffer.
+ *
+ * Returns 0 on success; -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ */
+extern int ds_reset_bts(struct task_struct *task);
+extern int ds_reset_pebs(struct task_struct *task);
+
+/*
+ * Clear the BTS/PEBS buffer and reset the write pointer.
+ * The entire buffer will be zeroed out.
+ *
+ * Returns 0 on success; -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ */
+extern int ds_clear_bts(struct task_struct *task);
+extern int ds_clear_pebs(struct task_struct *task);
+
+/*
+ * Provide the PEBS counter reset value.
+ *
+ * Returns 0 on success; -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ * value (out): the counter reset value
+ */
+extern int ds_get_pebs_reset(struct task_struct *task, u64 *value);
+
+/*
+ * Set the PEBS counter reset value.
+ *
+ * Returns 0 on success; -Eerrno on error
+ *
+ * task: the task to access;
+ *       NULL to access the current cpu
+ * value: the new counter reset value
+ */
+extern int ds_set_pebs_reset(struct task_struct *task, u64 value);
+
+/*
+ * Initialization
+ */
+struct cpuinfo_x86;
+extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
+
+
+
+/*
+ * The DS context - part of struct thread_struct.
+ */
+struct ds_context {
+	/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
+	unsigned char *ds;
+	/* the owner of the BTS and PEBS configuration, respectively */
+	struct task_struct *owner[2];
+	/* buffer overflow notification function for BTS and PEBS */
+	ds_ovfl_callback_t callback[2];
+	/* the original buffer address */
+	void *buffer[2];
+	/* the number of allocated pages for on-request allocated buffers */
+	unsigned int pages[2];
+	/* use count */
+	unsigned long count;
+	/* a pointer to the context location inside the thread_struct
+	 * or the per_cpu context array */
+	struct ds_context **this;
+	/* a pointer to the task owning this context, or NULL, if the
+	 * context is owned by a cpu */
+	struct task_struct *task;
 };
 
-struct bts_struct {
-	u64 qualifier;
-	union {
-		/* BTS_BRANCH */
-		struct {
-			u64 from_ip;
-			u64 to_ip;
-		} lbr;
-		/* BTS_TASK_ARRIVES or
-		   BTS_TASK_DEPARTS */
-		u64 jiffies;
-	} variant;
-};
+/* called by exit_thread() to free leftover contexts */
+extern void ds_free(struct ds_context *context);
 
-/* Overflow handling mechanisms */
-#define DS_O_SIGNAL	1 /* send overflow signal */
-#define DS_O_WRAP	2 /* wrap around */
+#else /* CONFIG_X86_DS */
 
-extern int ds_allocate(void **, size_t);
-extern int ds_free(void **);
-extern int ds_get_bts_size(void *);
-extern int ds_get_bts_end(void *);
-extern int ds_get_bts_index(void *);
-extern int ds_set_overflow(void *, int);
-extern int ds_get_overflow(void *);
-extern int ds_clear(void *);
-extern int ds_read_bts(void *, int, struct bts_struct *);
-extern int ds_write_bts(void *, const struct bts_struct *);
-extern unsigned long ds_debugctl_mask(void);
-extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c);
+#define ds_init_intel(config) do {} while (0)
 
-#endif /* _ASM_X86_DS_H */
+#endif /* CONFIG_X86_DS */
+#endif /* ASM_X86__DS_H */

diff --git a/include/asm-x86/dwarf2.h b/include/asm-x86/dwarf2.h
index 738bb9f..21d1bc3 100644
--- a/include/asm-x86/dwarf2.h
+++ b/include/asm-x86/dwarf2.h

@@ -1,5 +1,5 @@
-#ifndef _DWARF2_H
-#define _DWARF2_H
+#ifndef ASM_X86__DWARF2_H
+#define ASM_X86__DWARF2_H
 
 #ifndef __ASSEMBLY__
 #warning "asm/dwarf2.h should be only included in pure assembly files"
@@ -58,4 +58,4 @@
 
 #endif
 
-#endif
+#endif /* ASM_X86__DWARF2_H */

diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 16a31e2..f52daf1 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_E820_H
-#define __ASM_E820_H
+#ifndef ASM_X86__E820_H
+#define ASM_X86__E820_H
 #define E820MAP	0x2d0		/* our map */
 #define E820MAX	128		/* number of entries in E820MAP */
 
@@ -64,6 +64,7 @@
 extern struct e820map e820;
 extern struct e820map e820_saved;
 
+extern unsigned long pci_mem_start;
 extern int e820_any_mapped(u64 start, u64 end, unsigned type);
 extern int e820_all_mapped(u64 start, u64 end, unsigned type);
 extern void e820_add_region(u64 start, u64 size, int type);
@@ -140,4 +141,4 @@
 #define HIGH_MEMORY	(1024*1024)
 #endif /* __KERNEL__ */
 
-#endif  /* __ASM_E820_H */
+#endif /* ASM_X86__E820_H */

diff --git a/include/asm-x86/edac.h b/include/asm-x86/edac.h
index a8088f6..9493c5b 100644
--- a/include/asm-x86/edac.h
+++ b/include/asm-x86/edac.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_EDAC_H
-#define _ASM_X86_EDAC_H
+#ifndef ASM_X86__EDAC_H
+#define ASM_X86__EDAC_H
 
 /* ECC atomic, DMA, SMP and interrupt safe scrub function */
 
@@ -15,4 +15,4 @@
 		asm volatile("lock; addl $0, %0"::"m" (*virt_addr));
 }
 
-#endif
+#endif /* ASM_X86__EDAC_H */

diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index d4f2b0a..ed2de22 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_EFI_H
-#define _ASM_X86_EFI_H
+#ifndef ASM_X86__EFI_H
+#define ASM_X86__EFI_H
 
 #ifdef CONFIG_X86_32
 
@@ -94,4 +94,4 @@
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
 
-#endif
+#endif /* ASM_X86__EFI_H */

diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index 7be4733..5c4745b 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_ELF_H
-#define _ASM_X86_ELF_H
+#ifndef ASM_X86__ELF_H
+#define ASM_X86__ELF_H
 
 /*
  * ELF register definitions..
@@ -148,8 +148,9 @@
 
 static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp)
 {
-	asm volatile("movl %0,%%fs" :: "r" (0));
-	asm volatile("movl %0,%%es; movl %0,%%ds" : : "r" (__USER32_DS));
+	loadsegment(fs, 0);
+	loadsegment(ds, __USER32_DS);
+	loadsegment(es, __USER32_DS);
 	load_gs_index(0);
 	regs->ip = ip;
 	regs->sp = sp;
@@ -332,4 +333,4 @@
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
-#endif
+#endif /* ASM_X86__ELF_H */

diff --git a/include/asm-x86/emergency-restart.h b/include/asm-x86/emergency-restart.h
index 8e6aef1..190d0d8 100644
--- a/include/asm-x86/emergency-restart.h
+++ b/include/asm-x86/emergency-restart.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
+#ifndef ASM_X86__EMERGENCY_RESTART_H
+#define ASM_X86__EMERGENCY_RESTART_H
 
 enum reboot_type {
 	BOOT_TRIPLE = 't',
@@ -15,4 +15,4 @@
 
 extern void machine_emergency_restart(void);
 
-#endif /* _ASM_EMERGENCY_RESTART_H */
+#endif /* ASM_X86__EMERGENCY_RESTART_H */

diff --git a/include/asm-x86/fb.h b/include/asm-x86/fb.h
index 5301846..aca38db 100644
--- a/include/asm-x86/fb.h
+++ b/include/asm-x86/fb.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_FB_H
-#define _ASM_X86_FB_H
+#ifndef ASM_X86__FB_H
+#define ASM_X86__FB_H
 
 #include <linux/fb.h>
 #include <linux/fs.h>
@@ -18,4 +18,4 @@
 static inline int fb_is_primary_device(struct fb_info *info) { return 0; }
 #endif
 
-#endif /* _ASM_X86_FB_H */
+#endif /* ASM_X86__FB_H */

diff --git a/include/asm-x86/fixmap.h b/include/asm-x86/fixmap.h
index 44d4f82..78e33a1 100644
--- a/include/asm-x86/fixmap.h
+++ b/include/asm-x86/fixmap.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_FIXMAP_H
-#define _ASM_FIXMAP_H
+#ifndef ASM_X86__FIXMAP_H
+#define ASM_X86__FIXMAP_H
 
 #ifdef CONFIG_X86_32
 # include "fixmap_32.h"
@@ -65,4 +65,4 @@
 	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
 	return __virt_to_fix(vaddr);
 }
-#endif
+#endif /* ASM_X86__FIXMAP_H */

diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h
index f1ac2b2..784e3e7 100644
--- a/include/asm-x86/fixmap_32.h
+++ b/include/asm-x86/fixmap_32.h

@@ -10,8 +10,8 @@
  * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
-#ifndef _ASM_FIXMAP_32_H
-#define _ASM_FIXMAP_32_H
+#ifndef ASM_X86__FIXMAP_32_H
+#define ASM_X86__FIXMAP_32_H
 
 
 /* used by vmalloc.c, vsyscall.lds.S.
@@ -120,4 +120,4 @@
 #define FIXADDR_BOOT_START	(FIXADDR_TOP - __FIXADDR_BOOT_SIZE)
 
 #endif /* !__ASSEMBLY__ */
-#endif
+#endif /* ASM_X86__FIXMAP_32_H */

diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h
index 00f3d74..dafb24b 100644
--- a/include/asm-x86/fixmap_64.h
+++ b/include/asm-x86/fixmap_64.h

@@ -8,8 +8,8 @@
  * Copyright (C) 1998 Ingo Molnar
  */
 
-#ifndef _ASM_FIXMAP_64_H
-#define _ASM_FIXMAP_64_H
+#ifndef ASM_X86__FIXMAP_64_H
+#define ASM_X86__FIXMAP_64_H
 
 #include <linux/kernel.h>
 #include <asm/acpi.h>
@@ -80,4 +80,4 @@
 #define FIXADDR_USER_START	((unsigned long)VSYSCALL32_VSYSCALL)
 #define FIXADDR_USER_END	(FIXADDR_USER_START + PAGE_SIZE)
 
-#endif
+#endif /* ASM_X86__FIXMAP_64_H */

diff --git a/include/asm-x86/floppy.h b/include/asm-x86/floppy.h
index dbe82a5..7d83a3a 100644
--- a/include/asm-x86/floppy.h
+++ b/include/asm-x86/floppy.h

@@ -7,8 +7,8 @@
  *
  * Copyright (C) 1995
  */
-#ifndef _ASM_X86_FLOPPY_H
-#define _ASM_X86_FLOPPY_H
+#ifndef ASM_X86__FLOPPY_H
+#define ASM_X86__FLOPPY_H
 
 #include <linux/vmalloc.h>
 
@@ -278,4 +278,4 @@
 
 #define EXTRA_FLOPPY_PARAMS
 
-#endif /* _ASM_X86_FLOPPY_H */
+#endif /* ASM_X86__FLOPPY_H */

diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h
index 5c68b32..be0e004 100644
--- a/include/asm-x86/ftrace.h
+++ b/include/asm-x86/ftrace.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_FTRACE
-#define _ASM_X86_FTRACE
+#ifndef ASM_X86__FTRACE_H
+#define ASM_X86__FTRACE_H
 
 #ifdef CONFIG_FTRACE
 #define MCOUNT_ADDR		((long)(mcount))
@@ -11,4 +11,4 @@
 
 #endif /* CONFIG_FTRACE */
 
-#endif /* _ASM_X86_FTRACE */
+#endif /* ASM_X86__FTRACE_H */

diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h
index e7a76b3..06b924e 100644
--- a/include/asm-x86/futex.h
+++ b/include/asm-x86/futex.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_FUTEX_H
-#define _ASM_X86_FUTEX_H
+#ifndef ASM_X86__FUTEX_H
+#define ASM_X86__FUTEX_H
 
 #ifdef __KERNEL__
 
@@ -25,7 +25,7 @@
 	asm volatile("1:\tmovl	%2, %0\n"			\
 		     "\tmovl\t%0, %3\n"				\
 		     "\t" insn "\n"				\
-		     "2:\tlock; cmpxchgl %3, %2\n"		\
+		     "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"	\
 		     "\tjnz\t1b\n"				\
 		     "3:\t.section .fixup,\"ax\"\n"		\
 		     "4:\tmov\t%5, %1\n"			\
@@ -64,7 +64,7 @@
 		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
 		break;
 	case FUTEX_OP_ADD:
-		__futex_atomic_op1("lock; xaddl %0, %2", ret, oldval,
+		__futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
 				   uaddr, oparg);
 		break;
 	case FUTEX_OP_OR:
@@ -122,7 +122,7 @@
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-	asm volatile("1:\tlock; cmpxchgl %3, %1\n"
+	asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n"
 		     "2:\t.section .fixup, \"ax\"\n"
 		     "3:\tmov     %2, %0\n"
 		     "\tjmp     2b\n"
@@ -137,4 +137,4 @@
 }
 
 #endif
-#endif
+#endif /* ASM_X86__FUTEX_H */

diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h
index 3f62a83..baa54fa 100644
--- a/include/asm-x86/gart.h
+++ b/include/asm-x86/gart.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X8664_GART_H
-#define _ASM_X8664_GART_H 1
+#ifndef ASM_X86__GART_H
+#define ASM_X86__GART_H
 
 #include <asm/e820.h>
 
@@ -52,15 +52,15 @@
 		return 0;
 
 	if (aper_base + aper_size > 0x100000000ULL) {
-		printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n");
+		printk(KERN_INFO "Aperture beyond 4GB. Ignoring.\n");
 		return 0;
 	}
 	if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
-		printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n");
+		printk(KERN_INFO "Aperture pointing to e820 RAM. Ignoring.\n");
 		return 0;
 	}
 	if (aper_size < min_size) {
-		printk(KERN_ERR "Aperture too small (%d MB) than (%d MB)\n",
+		printk(KERN_INFO "Aperture too small (%d MB) than (%d MB)\n",
 				 aper_size>>20, min_size>>20);
 		return 0;
 	}
@@ -68,4 +68,4 @@
 	return 1;
 }
 
-#endif
+#endif /* ASM_X86__GART_H */

diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h
index 754d635..34280f0 100644
--- a/include/asm-x86/genapic_32.h
+++ b/include/asm-x86/genapic_32.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_GENAPIC_H
-#define _ASM_GENAPIC_H 1
+#ifndef ASM_X86__GENAPIC_32_H
+#define ASM_X86__GENAPIC_32_H
 
 #include <asm/mpspec.h>
 
@@ -121,4 +121,4 @@
 #define uv_system_init()		do {} while (0)
 
 
-#endif
+#endif /* ASM_X86__GENAPIC_32_H */

diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h
index a47d631..25097a8 100644
--- a/include/asm-x86/genapic_64.h
+++ b/include/asm-x86/genapic_64.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_GENAPIC_H
-#define _ASM_GENAPIC_H 1
+#ifndef ASM_X86__GENAPIC_64_H
+#define ASM_X86__GENAPIC_64_H
 
 /*
  * Copyright 2004 James Cleverdon, IBM.
@@ -47,4 +47,4 @@
 
 extern void setup_apic_routing(void);
 
-#endif
+#endif /* ASM_X86__GENAPIC_64_H */

diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h
index 2c1cda0..3f3444b 100644
--- a/include/asm-x86/geode.h
+++ b/include/asm-x86/geode.h

@@ -7,8 +7,8 @@
  * as published by the Free Software Foundation.
  */
 
-#ifndef _ASM_GEODE_H_
-#define _ASM_GEODE_H_
+#ifndef ASM_X86__GEODE_H
+#define ASM_X86__GEODE_H
 
 #include <asm/processor.h>
 #include <linux/io.h>
@@ -250,4 +250,4 @@
 static inline int mfgpt_timer_setup(void) { return 0; }
 #endif
 
-#endif
+#endif /* ASM_X86__GEODE_H */

diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h
index c4c91b3..497fb98 100644
--- a/include/asm-x86/gpio.h
+++ b/include/asm-x86/gpio.h

@@ -53,4 +53,4 @@
 
 #endif /* CONFIG_GPIOLIB */
 
-#endif /* _ASM_I386_GPIO_H */
+#endif /* ASM_X86__GPIO_H */

diff --git a/include/asm-x86/hardirq_32.h b/include/asm-x86/hardirq_32.h
index 4f85f0f..700fe23 100644
--- a/include/asm-x86/hardirq_32.h
+++ b/include/asm-x86/hardirq_32.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_HARDIRQ_H
-#define __ASM_HARDIRQ_H
+#ifndef ASM_X86__HARDIRQ_32_H
+#define ASM_X86__HARDIRQ_32_H
 
 #include <linux/threads.h>
 #include <linux/irq.h>
@@ -25,4 +25,4 @@
 void ack_bad_irq(unsigned int irq);
 #include <linux/irq_cpustat.h>
 
-#endif /* __ASM_HARDIRQ_H */
+#endif /* ASM_X86__HARDIRQ_32_H */

diff --git a/include/asm-x86/hardirq_64.h b/include/asm-x86/hardirq_64.h
index 95d5e09..f8bd291 100644
--- a/include/asm-x86/hardirq_64.h
+++ b/include/asm-x86/hardirq_64.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_HARDIRQ_H
-#define __ASM_HARDIRQ_H
+#ifndef ASM_X86__HARDIRQ_64_H
+#define ASM_X86__HARDIRQ_64_H
 
 #include <linux/threads.h>
 #include <linux/irq.h>
@@ -20,4 +20,4 @@
 
 extern void ack_bad_irq(unsigned int irq);
 
-#endif /* __ASM_HARDIRQ_H */
+#endif /* ASM_X86__HARDIRQ_64_H */

diff --git a/include/asm-x86/highmem.h b/include/asm-x86/highmem.h
index 4514b16..bc3f6a2 100644
--- a/include/asm-x86/highmem.h
+++ b/include/asm-x86/highmem.h

@@ -15,8 +15,8 @@
  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
  */
 
-#ifndef _ASM_HIGHMEM_H
-#define _ASM_HIGHMEM_H
+#ifndef ASM_X86__HIGHMEM_H
+#define ASM_X86__HIGHMEM_H
 
 #ifdef __KERNEL__
 
@@ -79,4 +79,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif /* _ASM_HIGHMEM_H */
+#endif /* ASM_X86__HIGHMEM_H */

diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index 82f1ac6..cbbbb6d 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h

@@ -1,5 +1,5 @@
-#ifndef ASM_X86_HPET_H
-#define ASM_X86_HPET_H
+#ifndef ASM_X86__HPET_H
+#define ASM_X86__HPET_H
 
 #ifdef CONFIG_HPET_TIMER
 
@@ -90,4 +90,4 @@
 #define hpet_readl(a) 0
 
 #endif
-#endif /* ASM_X86_HPET_H */
+#endif /* ASM_X86__HPET_H */

diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h
index 439a9ac..0b7ec5d 100644
--- a/include/asm-x86/hugetlb.h
+++ b/include/asm-x86/hugetlb.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_HUGETLB_H
-#define _ASM_X86_HUGETLB_H
+#ifndef ASM_X86__HUGETLB_H
+#define ASM_X86__HUGETLB_H
 
 #include <asm/page.h>
 
@@ -90,4 +90,4 @@
 {
 }
 
-#endif /* _ASM_X86_HUGETLB_H */
+#endif /* ASM_X86__HUGETLB_H */

diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index edd0b95..65997b1 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_HW_IRQ_H
-#define _ASM_HW_IRQ_H
+#ifndef ASM_X86__HW_IRQ_H
+#define ASM_X86__HW_IRQ_H
 
 /*
  * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
@@ -93,6 +93,26 @@
 extern asmlinkage void qic_enable_irq_interrupt(void);
 extern asmlinkage void qic_call_function_interrupt(void);
 
+/* SMP */
+extern void smp_apic_timer_interrupt(struct pt_regs *);
+#ifdef CONFIG_X86_32
+extern void smp_spurious_interrupt(struct pt_regs *);
+extern void smp_error_interrupt(struct pt_regs *);
+#else
+extern asmlinkage void smp_spurious_interrupt(void);
+extern asmlinkage void smp_error_interrupt(void);
+#endif
+#ifdef CONFIG_X86_SMP
+extern void smp_reschedule_interrupt(struct pt_regs *);
+extern void smp_call_function_interrupt(struct pt_regs *);
+extern void smp_call_function_single_interrupt(struct pt_regs *);
+#ifdef CONFIG_X86_32
+extern void smp_invalidate_interrupt(struct pt_regs *);
+#else
+extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
+#endif
+#endif
+
 #ifdef CONFIG_X86_32
 extern void (*const interrupt[NR_IRQS])(void);
 #else
@@ -112,4 +132,4 @@
 
 #endif /* !ASSEMBLY_ */
 
-#endif
+#endif /* ASM_X86__HW_IRQ_H */

diff --git a/include/asm-x86/hypertransport.h b/include/asm-x86/hypertransport.h
index d2bbd23..cc011a3 100644
--- a/include/asm-x86/hypertransport.h
+++ b/include/asm-x86/hypertransport.h

@@ -1,5 +1,5 @@
-#ifndef ASM_HYPERTRANSPORT_H
-#define ASM_HYPERTRANSPORT_H
+#ifndef ASM_X86__HYPERTRANSPORT_H
+#define ASM_X86__HYPERTRANSPORT_H
 
 /*
  * Constants for x86 Hypertransport Interrupts.
@@ -42,4 +42,4 @@
 #define HT_IRQ_HIGH_DEST_ID(v)						\
 	((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK)
 
-#endif /* ASM_HYPERTRANSPORT_H */
+#endif /* ASM_X86__HYPERTRANSPORT_H */

diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 56d00e3..1ecdc3e 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h

@@ -7,8 +7,8 @@
  * x86-64 work by Andi Kleen 2002
  */
 
-#ifndef _ASM_X86_I387_H
-#define _ASM_X86_I387_H
+#ifndef ASM_X86__I387_H
+#define ASM_X86__I387_H
 
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
@@ -25,6 +25,7 @@
 extern int init_fpu(struct task_struct *child);
 extern asmlinkage void math_state_restore(void);
 extern void init_thread_xstate(void);
+extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
 
 extern user_regset_active_fn fpregs_active, xfpregs_active;
 extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
@@ -336,4 +337,4 @@
 	}
 }
 
-#endif	/* _ASM_X86_I387_H */
+#endif /* ASM_X86__I387_H */

diff --git a/include/asm-x86/i8253.h b/include/asm-x86/i8253.h
index b51c048..15a5b53 100644
--- a/include/asm-x86/i8253.h
+++ b/include/asm-x86/i8253.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_I8253_H__
-#define __ASM_I8253_H__
+#ifndef ASM_X86__I8253_H
+#define ASM_X86__I8253_H
 
 /* i8253A PIT registers */
 #define PIT_MODE		0x43
@@ -15,4 +15,4 @@
 #define inb_pit		inb_p
 #define outb_pit	outb_p
 
-#endif	/* __ASM_I8253_H__ */
+#endif /* ASM_X86__I8253_H */

diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index 2f98df9..c586559 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_I8259_H__
-#define __ASM_I8259_H__
+#ifndef ASM_X86__I8259_H
+#define ASM_X86__I8259_H
 
 #include <linux/delay.h>
 
@@ -57,4 +57,4 @@
 
 extern struct irq_chip i8259A_chip;
 
-#endif	/* __ASM_I8259_H__ */
+#endif /* ASM_X86__I8259_H */

diff --git a/include/asm-x86/ia32.h b/include/asm-x86/ia32.h
index 55d3abe..f932f7a 100644
--- a/include/asm-x86/ia32.h
+++ b/include/asm-x86/ia32.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_IA32_H
-#define _ASM_X86_64_IA32_H
+#ifndef ASM_X86__IA32_H
+#define ASM_X86__IA32_H
 
 
 #ifdef CONFIG_IA32_EMULATION
@@ -167,4 +167,4 @@
 
 #endif /* !CONFIG_IA32_SUPPORT */
 
-#endif
+#endif /* ASM_X86__IA32_H */

diff --git a/include/asm-x86/ia32_unistd.h b/include/asm-x86/ia32_unistd.h
index 61cea9e..dbd887d 100644
--- a/include/asm-x86/ia32_unistd.h
+++ b/include/asm-x86/ia32_unistd.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_IA32_UNISTD_H_
-#define _ASM_X86_64_IA32_UNISTD_H_
+#ifndef ASM_X86__IA32_UNISTD_H
+#define ASM_X86__IA32_UNISTD_H
 
 /*
  * This file contains the system call numbers of the ia32 port,
@@ -15,4 +15,4 @@
 #define __NR_ia32_sigreturn	119
 #define __NR_ia32_rt_sigreturn	173
 
-#endif /* _ASM_X86_64_IA32_UNISTD_H_ */
+#endif /* ASM_X86__IA32_UNISTD_H */

diff --git a/include/asm-x86/idle.h b/include/asm-x86/idle.h
index cbb6491..baa3f78 100644
--- a/include/asm-x86/idle.h
+++ b/include/asm-x86/idle.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_IDLE_H
-#define _ASM_X86_64_IDLE_H 1
+#ifndef ASM_X86__IDLE_H
+#define ASM_X86__IDLE_H
 
 #define IDLE_START 1
 #define IDLE_END 2
@@ -12,4 +12,4 @@
 
 void c1e_remove_cpu(int cpu);
 
-#endif
+#endif /* ASM_X86__IDLE_H */

diff --git a/include/asm-x86/intel_arch_perfmon.h b/include/asm-x86/intel_arch_perfmon.h
index fa0fd06..07c03c6 100644
--- a/include/asm-x86/intel_arch_perfmon.h
+++ b/include/asm-x86/intel_arch_perfmon.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
-#define _ASM_X86_INTEL_ARCH_PERFMON_H
+#ifndef ASM_X86__INTEL_ARCH_PERFMON_H
+#define ASM_X86__INTEL_ARCH_PERFMON_H
 
 #define MSR_ARCH_PERFMON_PERFCTR0		0xc1
 #define MSR_ARCH_PERFMON_PERFCTR1		0xc2
@@ -28,4 +28,4 @@
 	unsigned int full;
 };
 
-#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */
+#endif /* ASM_X86__INTEL_ARCH_PERFMON_H */

diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h
index 0f954dc..72b7719 100644
--- a/include/asm-x86/io.h
+++ b/include/asm-x86/io.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_IO_H
-#define _ASM_X86_IO_H
+#ifndef ASM_X86__IO_H
+#define ASM_X86__IO_H
 
 #define ARCH_HAS_IOREMAP_WC
 
@@ -73,6 +73,8 @@
 #define writeq writeq
 #endif
 
+extern int iommu_bio_merge;
+
 #ifdef CONFIG_X86_32
 # include "io_32.h"
 #else
@@ -99,4 +101,4 @@
 extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
 
 
-#endif /* _ASM_X86_IO_H */
+#endif /* ASM_X86__IO_H */

diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index e876d89..4f7d878 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_IO_H
-#define _ASM_IO_H
+#ifndef ASM_X86__IO_32_H
+#define ASM_X86__IO_32_H
 
 #include <linux/string.h>
 #include <linux/compiler.h>
@@ -281,4 +281,4 @@
 BUILDIO(w, w, short)
 BUILDIO(l, , int)
 
-#endif
+#endif /* ASM_X86__IO_32_H */

diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index 22995c5..64429e9 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_IO_H
-#define _ASM_IO_H
+#ifndef ASM_X86__IO_64_H
+#define ASM_X86__IO_64_H
 
 
 /*
@@ -235,7 +235,6 @@
 
 #define flush_write_buffers()
 
-extern int iommu_bio_merge;
 #define BIO_VMERGE_BOUNDARY iommu_bio_merge
 
 /*
@@ -245,4 +244,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__IO_64_H */

diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 14f82bb..be62847 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_IO_APIC_H
-#define __ASM_IO_APIC_H
+#ifndef ASM_X86__IO_APIC_H
+#define ASM_X86__IO_APIC_H
 
 #include <linux/types.h>
 #include <asm/mpspec.h>
@@ -189,4 +189,4 @@
 static inline void ioapic_init_mappings(void) { }
 #endif
 
-#endif
+#endif /* ASM_X86__IO_APIC_H */

diff --git a/include/asm-x86/ioctls.h b/include/asm-x86/ioctls.h
index c0c338b..3366035 100644
--- a/include/asm-x86/ioctls.h
+++ b/include/asm-x86/ioctls.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_IOCTLS_H
-#define _ASM_X86_IOCTLS_H
+#ifndef ASM_X86__IOCTLS_H
+#define ASM_X86__IOCTLS_H
 
 #include <asm/ioctl.h>
 
@@ -85,4 +85,4 @@
 
 #define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
 
-#endif
+#endif /* ASM_X86__IOCTLS_H */

diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h
index 5f888cc..e86f441 100644
--- a/include/asm-x86/iommu.h
+++ b/include/asm-x86/iommu.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X8664_IOMMU_H
-#define _ASM_X8664_IOMMU_H 1
+#ifndef ASM_X86__IOMMU_H
+#define ASM_X86__IOMMU_H
 
 extern void pci_iommu_shutdown(void);
 extern void no_iommu_init(void);
@@ -42,4 +42,4 @@
 }
 #endif
 
-#endif
+#endif /* ASM_X86__IOMMU_H */

diff --git a/include/asm-x86/ipcbuf.h b/include/asm-x86/ipcbuf.h
index ee678fd..910304f 100644
--- a/include/asm-x86/ipcbuf.h
+++ b/include/asm-x86/ipcbuf.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_IPCBUF_H
-#define _ASM_X86_IPCBUF_H
+#ifndef ASM_X86__IPCBUF_H
+#define ASM_X86__IPCBUF_H
 
 /*
  * The ipc64_perm structure for x86 architecture.
@@ -25,4 +25,4 @@
 	unsigned long		__unused2;
 };
 
-#endif /* _ASM_X86_IPCBUF_H */
+#endif /* ASM_X86__IPCBUF_H */

diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h
index bb1c09f..c1b2267 100644
--- a/include/asm-x86/ipi.h
+++ b/include/asm-x86/ipi.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_IPI_H
-#define __ASM_IPI_H
+#ifndef ASM_X86__IPI_H
+#define ASM_X86__IPI_H
 
 /*
  * Copyright 2004 James Cleverdon, IBM.
@@ -129,4 +129,4 @@
 	local_irq_restore(flags);
 }
 
-#endif /* __ASM_IPI_H */
+#endif /* ASM_X86__IPI_H */

diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h
index 1a29257..1e5f290 100644
--- a/include/asm-x86/irq.h
+++ b/include/asm-x86/irq.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_IRQ_H
-#define _ASM_IRQ_H
+#ifndef ASM_X86__IRQ_H
+#define ASM_X86__IRQ_H
 /*
  *	(C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
  *
@@ -47,4 +47,4 @@
 /* Interrupt vector management */
 extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
 
-#endif /* _ASM_IRQ_H */
+#endif /* ASM_X86__IRQ_H */

diff --git a/include/asm-x86/irq_regs_32.h b/include/asm-x86/irq_regs_32.h
index 3368b20..316a3b2 100644
--- a/include/asm-x86/irq_regs_32.h
+++ b/include/asm-x86/irq_regs_32.h

@@ -4,8 +4,8 @@
  *
  * Jeremy Fitzhardinge <jeremy@goop.org>
  */
-#ifndef _ASM_I386_IRQ_REGS_H
-#define _ASM_I386_IRQ_REGS_H
+#ifndef ASM_X86__IRQ_REGS_32_H
+#define ASM_X86__IRQ_REGS_32_H
 
 #include <asm/percpu.h>
 
@@ -26,4 +26,4 @@
 	return old_regs;
 }
 
-#endif /* _ASM_I386_IRQ_REGS_H */
+#endif /* ASM_X86__IRQ_REGS_32_H */

diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
index a48c7f2..c5d2d76 100644
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_IRQ_VECTORS_H
-#define _ASM_IRQ_VECTORS_H
+#ifndef ASM_X86__IRQ_VECTORS_H
+#define ASM_X86__IRQ_VECTORS_H
 
 #include <linux/threads.h>
 
@@ -179,4 +179,4 @@
 #define VIC_CPU_BOOT_ERRATA_CPI		(VIC_CPI_LEVEL0 + 8)
 
 
-#endif /* _ASM_IRQ_VECTORS_H */
+#endif /* ASM_X86__IRQ_VECTORS_H */

diff --git a/include/asm-x86/ist.h b/include/asm-x86/ist.h
index 6ec6cee..35a2fe9 100644
--- a/include/asm-x86/ist.h
+++ b/include/asm-x86/ist.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_IST_H
-#define _ASM_IST_H
+#ifndef ASM_X86__IST_H
+#define ASM_X86__IST_H
 
 /*
  * Include file for the interface to IST BIOS
@@ -31,4 +31,4 @@
 extern struct ist_info ist_info;
 
 #endif	/* __KERNEL__ */
-#endif	/* _ASM_IST_H */
+#endif /* ASM_X86__IST_H */

diff --git a/include/asm-x86/k8.h b/include/asm-x86/k8.h
index 452e2b6..2bbaf43 100644
--- a/include/asm-x86/k8.h
+++ b/include/asm-x86/k8.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_K8_H
-#define _ASM_K8_H 1
+#ifndef ASM_X86__K8_H
+#define ASM_X86__K8_H
 
 #include <linux/pci.h>
 
@@ -12,4 +12,4 @@
 extern void k8_flush_garts(void);
 extern int k8_scan_nodes(unsigned long start, unsigned long end);
 
-#endif
+#endif /* ASM_X86__K8_H */

diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h
index 96651bb..5ec3ad3 100644
--- a/include/asm-x86/kdebug.h
+++ b/include/asm-x86/kdebug.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_KDEBUG_H
-#define _ASM_X86_KDEBUG_H
+#ifndef ASM_X86__KDEBUG_H
+#define ASM_X86__KDEBUG_H
 
 #include <linux/notifier.h>
 
@@ -35,4 +35,4 @@
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
 
-#endif
+#endif /* ASM_X86__KDEBUG_H */

diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h
index 4246ab7..ea09600 100644
--- a/include/asm-x86/kexec.h
+++ b/include/asm-x86/kexec.h

@@ -1,5 +1,5 @@
-#ifndef _KEXEC_H
-#define _KEXEC_H
+#ifndef ASM_X86__KEXEC_H
+#define ASM_X86__KEXEC_H
 
 #ifdef CONFIG_X86_32
 # define PA_CONTROL_PAGE	0
@@ -172,4 +172,4 @@
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* _KEXEC_H */
+#endif /* ASM_X86__KEXEC_H */

diff --git a/include/asm-x86/kgdb.h b/include/asm-x86/kgdb.h
index 94d63db..d283863 100644
--- a/include/asm-x86/kgdb.h
+++ b/include/asm-x86/kgdb.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_KGDB_H_
-#define _ASM_KGDB_H_
+#ifndef ASM_X86__KGDB_H
+#define ASM_X86__KGDB_H
 
 /*
  * Copyright (C) 2001-2004 Amit S. Kale
@@ -76,4 +76,4 @@
 #define BREAK_INSTR_SIZE	1
 #define CACHE_FLUSH_IS_SAFE	1
 
-#endif				/* _ASM_KGDB_H_ */
+#endif /* ASM_X86__KGDB_H */

diff --git a/include/asm-x86/kmap_types.h b/include/asm-x86/kmap_types.h
index 5f41741..89f4449 100644
--- a/include/asm-x86/kmap_types.h
+++ b/include/asm-x86/kmap_types.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_KMAP_TYPES_H
-#define _ASM_X86_KMAP_TYPES_H
+#ifndef ASM_X86__KMAP_TYPES_H
+#define ASM_X86__KMAP_TYPES_H
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM)
 # define D(n) __KM_FENCE_##n ,
@@ -26,4 +26,4 @@
 
 #undef D
 
-#endif
+#endif /* ASM_X86__KMAP_TYPES_H */

diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h
index 54980b0..bd84078 100644
--- a/include/asm-x86/kprobes.h
+++ b/include/asm-x86/kprobes.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_KPROBES_H
-#define _ASM_KPROBES_H
+#ifndef ASM_X86__KPROBES_H
+#define ASM_X86__KPROBES_H
 /*
  *  Kernel Probes (KProbes)
  *
@@ -94,4 +94,4 @@
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 				    unsigned long val, void *data);
-#endif				/* _ASM_KPROBES_H */
+#endif /* ASM_X86__KPROBES_H */

diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
index 6f18408..78e954d 100644
--- a/include/asm-x86/kvm.h
+++ b/include/asm-x86/kvm.h

@@ -1,5 +1,5 @@
-#ifndef __LINUX_KVM_X86_H
-#define __LINUX_KVM_X86_H
+#ifndef ASM_X86__KVM_H
+#define ASM_X86__KVM_H
 
 /*
  * KVM x86 specific structures and definitions
@@ -230,4 +230,4 @@
 #define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
 #define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
 
-#endif
+#endif /* ASM_X86__KVM_H */

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index c2e34c2..6979454 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h

@@ -1,4 +1,4 @@
-#/*
+/*
  * Kernel-based Virtual Machine driver for Linux
  *
  * This header defines architecture specific interfaces, x86 version
@@ -8,8 +8,8 @@
  *
  */
 
-#ifndef ASM_KVM_HOST_H
-#define ASM_KVM_HOST_H
+#ifndef ASM_X86__KVM_HOST_H
+#define ASM_X86__KVM_HOST_H
 
 #include <linux/types.h>
 #include <linux/mm.h>
@@ -735,4 +735,4 @@
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 
-#endif
+#endif /* ASM_X86__KVM_HOST_H */

diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index 76f3921..30054fd 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h

@@ -1,5 +1,5 @@
-#ifndef __X86_KVM_PARA_H
-#define __X86_KVM_PARA_H
+#ifndef ASM_X86__KVM_PARA_H
+#define ASM_X86__KVM_PARA_H
 
 /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
  * should be used to determine that a VM is running under KVM.
@@ -144,4 +144,4 @@
 
 #endif
 
-#endif
+#endif /* ASM_X86__KVM_PARA_H */

diff --git a/include/asm-x86/kvm_x86_emulate.h b/include/asm-x86/kvm_x86_emulate.h
index 4e8c1e4..e2d9b03 100644
--- a/include/asm-x86/kvm_x86_emulate.h
+++ b/include/asm-x86/kvm_x86_emulate.h

@@ -8,8 +8,8 @@
  * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
  */
 
-#ifndef __X86_EMULATE_H__
-#define __X86_EMULATE_H__
+#ifndef ASM_X86__KVM_X86_EMULATE_H
+#define ASM_X86__KVM_X86_EMULATE_H
 
 struct x86_emulate_ctxt;
 
@@ -181,4 +181,4 @@
 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt,
 		     struct x86_emulate_ops *ops);
 
-#endif				/* __X86_EMULATE_H__ */
+#endif /* ASM_X86__KVM_X86_EMULATE_H */

diff --git a/include/asm-x86/ldt.h b/include/asm-x86/ldt.h
index 20c5972..a522850 100644
--- a/include/asm-x86/ldt.h
+++ b/include/asm-x86/ldt.h

@@ -3,8 +3,8 @@
  *
  * Definitions of structures used with the modify_ldt system call.
  */
-#ifndef _ASM_X86_LDT_H
-#define _ASM_X86_LDT_H
+#ifndef ASM_X86__LDT_H
+#define ASM_X86__LDT_H
 
 /* Maximum number of LDT entries supported. */
 #define LDT_ENTRIES	8192
@@ -37,4 +37,4 @@
 #define MODIFY_LDT_CONTENTS_CODE	2
 
 #endif /* !__ASSEMBLY__ */
-#endif
+#endif /* ASM_X86__LDT_H */

diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h
index be4a724..7505e94 100644
--- a/include/asm-x86/lguest.h
+++ b/include/asm-x86/lguest.h

@@ -1,5 +1,5 @@
-#ifndef _X86_LGUEST_H
-#define _X86_LGUEST_H
+#ifndef ASM_X86__LGUEST_H
+#define ASM_X86__LGUEST_H
 
 #define GDT_ENTRY_LGUEST_CS	10
 #define GDT_ENTRY_LGUEST_DS	11
@@ -91,4 +91,4 @@
 
 #endif /* __ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__LGUEST_H */

diff --git a/include/asm-x86/lguest_hcall.h b/include/asm-x86/lguest_hcall.h
index a3241f2..8f034ba 100644
--- a/include/asm-x86/lguest_hcall.h
+++ b/include/asm-x86/lguest_hcall.h

@@ -1,6 +1,6 @@
 /* Architecture specific portion of the lguest hypercalls */
-#ifndef _X86_LGUEST_HCALL_H
-#define _X86_LGUEST_HCALL_H
+#ifndef ASM_X86__LGUEST_HCALL_H
+#define ASM_X86__LGUEST_HCALL_H
 
 #define LHCALL_FLUSH_ASYNC	0
 #define LHCALL_LGUEST_INIT	1
@@ -68,4 +68,4 @@
 };
 
 #endif /* !__ASSEMBLY__ */
-#endif	/* _I386_LGUEST_HCALL_H */
+#endif /* ASM_X86__LGUEST_HCALL_H */

diff --git a/include/asm-x86/linkage.h b/include/asm-x86/linkage.h
index 64e444f..42d8b62 100644
--- a/include/asm-x86/linkage.h
+++ b/include/asm-x86/linkage.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
+#ifndef ASM_X86__LINKAGE_H
+#define ASM_X86__LINKAGE_H
 
 #undef notrace
 #define notrace __attribute__((no_instrument_function))
@@ -57,5 +57,5 @@
 #define __ALIGN_STR ".align 16,0x90"
 #endif
 
-#endif
+#endif /* ASM_X86__LINKAGE_H */
 

diff --git a/include/asm-x86/local.h b/include/asm-x86/local.h
index 330a724..ae91994 100644
--- a/include/asm-x86/local.h
+++ b/include/asm-x86/local.h

@@ -1,5 +1,5 @@
-#ifndef _ARCH_LOCAL_H
-#define _ARCH_LOCAL_H
+#ifndef ASM_X86__LOCAL_H
+#define ASM_X86__LOCAL_H
 
 #include <linux/percpu.h>
 
@@ -232,4 +232,4 @@
 #define __cpu_local_add(i, l)	cpu_local_add((i), (l))
 #define __cpu_local_sub(i, l)	cpu_local_sub((i), (l))
 
-#endif /* _ARCH_LOCAL_H */
+#endif /* ASM_X86__LOCAL_H */

diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/mach-bigsmp/mach_apic.h
index c3b9dc6..05362d4 100644
--- a/include/asm-x86/mach-bigsmp/mach_apic.h
+++ b/include/asm-x86/mach-bigsmp/mach_apic.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_BIGSMP__MACH_APIC_H
+#define ASM_X86__MACH_BIGSMP__MACH_APIC_H
 
 #define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu))
 #define esr_disable (1)
@@ -141,4 +141,4 @@
 	return cpuid_apic >> index_msb;
 }
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_BIGSMP__MACH_APIC_H */

diff --git a/include/asm-x86/mach-bigsmp/mach_apicdef.h b/include/asm-x86/mach-bigsmp/mach_apicdef.h
index a58ab5a..811935d 100644
--- a/include/asm-x86/mach-bigsmp/mach_apicdef.h
+++ b/include/asm-x86/mach-bigsmp/mach_apicdef.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
+#ifndef ASM_X86__MACH_BIGSMP__MACH_APICDEF_H
+#define ASM_X86__MACH_BIGSMP__MACH_APICDEF_H
 
 #define		APIC_ID_MASK		(0xFF<<24)
 
@@ -10,4 +10,4 @@
 
 #define		GET_APIC_ID(x)	get_apic_id(x)
 
-#endif
+#endif /* ASM_X86__MACH_BIGSMP__MACH_APICDEF_H */

diff --git a/include/asm-x86/mach-bigsmp/mach_ipi.h b/include/asm-x86/mach-bigsmp/mach_ipi.h
index 9404c53..b1b0f96 100644
--- a/include/asm-x86/mach-bigsmp/mach_ipi.h
+++ b/include/asm-x86/mach-bigsmp/mach_ipi.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef ASM_X86__MACH_BIGSMP__MACH_IPI_H
+#define ASM_X86__MACH_BIGSMP__MACH_IPI_H
 
 void send_IPI_mask_sequence(cpumask_t mask, int vector);
 
@@ -22,4 +22,4 @@
 	send_IPI_mask(cpu_online_map, vector);
 }
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* ASM_X86__MACH_BIGSMP__MACH_IPI_H */

diff --git a/include/asm-x86/mach-default/apm.h b/include/asm-x86/mach-default/apm.h
index 989f34c..2aa61b5 100644
--- a/include/asm-x86/mach-default/apm.h
+++ b/include/asm-x86/mach-default/apm.h

@@ -3,8 +3,8 @@
  *  Split out from apm.c by Osamu Tomita <tomita@cinet.co.jp>
  */
 
-#ifndef _ASM_APM_H
-#define _ASM_APM_H
+#ifndef ASM_X86__MACH_DEFAULT__APM_H
+#define ASM_X86__MACH_DEFAULT__APM_H
 
 #ifdef APM_ZERO_SEGS
 #	define APM_DO_ZERO_SEGS \
@@ -70,4 +70,4 @@
 	return error;
 }
 
-#endif /* _ASM_APM_H */
+#endif /* ASM_X86__MACH_DEFAULT__APM_H */

diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index f3226b9..b615f40 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_APIC_H
+#define ASM_X86__MACH_DEFAULT__MACH_APIC_H
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
@@ -138,4 +138,4 @@
 }
 
 #endif /* CONFIG_X86_LOCAL_APIC */
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_APIC_H */

diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h
index e4b29ba..936704f 100644
--- a/include/asm-x86/mach-default/mach_apicdef.h
+++ b/include/asm-x86/mach-default/mach_apicdef.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_APICDEF_H
+#define ASM_X86__MACH_DEFAULT__MACH_APICDEF_H
 
 #include <asm/apic.h>
 
@@ -21,4 +21,4 @@
 #define		GET_APIC_ID(x)	get_apic_id(x)
 #endif
 
-#endif
+#endif /* ASM_X86__MACH_DEFAULT__MACH_APICDEF_H */

diff --git a/include/asm-x86/mach-default/mach_ipi.h b/include/asm-x86/mach-default/mach_ipi.h
index be32336..674bc7e 100644
--- a/include/asm-x86/mach-default/mach_ipi.h
+++ b/include/asm-x86/mach-default/mach_ipi.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_IPI_H
+#define ASM_X86__MACH_DEFAULT__MACH_IPI_H
 
 /* Avoid include hell */
 #define NMI_VECTOR 0x02
@@ -61,4 +61,4 @@
 }
 #endif
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_IPI_H */

diff --git a/include/asm-x86/mach-default/mach_mpparse.h b/include/asm-x86/mach-default/mach_mpparse.h
index d141085..9c381f2 100644
--- a/include/asm-x86/mach-default/mach_mpparse.h
+++ b/include/asm-x86/mach-default/mach_mpparse.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPPARSE_H
-#define __ASM_MACH_MPPARSE_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H
+#define ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H
 
 static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, 
 		char *productid)
@@ -14,4 +14,4 @@
 }
 
 
-#endif /* __ASM_MACH_MPPARSE_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H */

diff --git a/include/asm-x86/mach-default/mach_mpspec.h b/include/asm-x86/mach-default/mach_mpspec.h
index 51c9a97..d77646f 100644
--- a/include/asm-x86/mach-default/mach_mpspec.h
+++ b/include/asm-x86/mach-default/mach_mpspec.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPSPEC_H
-#define __ASM_MACH_MPSPEC_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H
+#define ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H
 
 #define MAX_IRQ_SOURCES 256
 
@@ -9,4 +9,4 @@
 #define MAX_MP_BUSSES 32
 #endif
 
-#endif /* __ASM_MACH_MPSPEC_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H */

diff --git a/include/asm-x86/mach-default/mach_timer.h b/include/asm-x86/mach-default/mach_timer.h
index 4b76e53..990b158 100644
--- a/include/asm-x86/mach-default/mach_timer.h
+++ b/include/asm-x86/mach-default/mach_timer.h

@@ -10,8 +10,8 @@
  * directly because of the awkward 8-bit access mechanism of the 82C54
  * device.
  */
-#ifndef _MACH_TIMER_H
-#define _MACH_TIMER_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_TIMER_H
+#define ASM_X86__MACH_DEFAULT__MACH_TIMER_H
 
 #define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
 #define CALIBRATE_LATCH	\
@@ -45,4 +45,4 @@
 	*count_p = count;
 }
 
-#endif /* !_MACH_TIMER_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_TIMER_H */

diff --git a/include/asm-x86/mach-default/mach_traps.h b/include/asm-x86/mach-default/mach_traps.h
index 2fe7705..de9ac3f 100644
--- a/include/asm-x86/mach-default/mach_traps.h
+++ b/include/asm-x86/mach-default/mach_traps.h

@@ -2,8 +2,8 @@
  *  Machine specific NMI handling for generic.
  *  Split out from traps.c by Osamu Tomita <tomita@cinet.co.jp>
  */
-#ifndef _MACH_TRAPS_H
-#define _MACH_TRAPS_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_TRAPS_H
+#define ASM_X86__MACH_DEFAULT__MACH_TRAPS_H
 
 #include <asm/mc146818rtc.h>
 
@@ -36,4 +36,4 @@
 		unlock_cmos();
 }
 
-#endif /* !_MACH_TRAPS_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_TRAPS_H */

diff --git a/include/asm-x86/mach-default/mach_wakecpu.h b/include/asm-x86/mach-default/mach_wakecpu.h
index 3ebb178..361b810 100644
--- a/include/asm-x86/mach-default/mach_wakecpu.h
+++ b/include/asm-x86/mach-default/mach_wakecpu.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_WAKECPU_H
-#define __ASM_MACH_WAKECPU_H
+#ifndef ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H
+#define ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H
 
 /* 
  * This file copes with machines that wakeup secondary CPUs by the
@@ -39,4 +39,4 @@
  #define inquire_remote_apic(apicid) {}
 #endif
 
-#endif /* __ASM_MACH_WAKECPU_H */
+#endif /* ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H */

diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h
index 0a3fdf9..c1f6f68 100644
--- a/include/asm-x86/mach-es7000/mach_apic.h
+++ b/include/asm-x86/mach-es7000/mach_apic.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_ES7000__MACH_APIC_H
+#define ASM_X86__MACH_ES7000__MACH_APIC_H
 
 #define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu)
 #define esr_disable (1)
@@ -191,4 +191,4 @@
 	return cpuid_apic >> index_msb;
 }
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_ES7000__MACH_APIC_H */

diff --git a/include/asm-x86/mach-es7000/mach_apicdef.h b/include/asm-x86/mach-es7000/mach_apicdef.h
index a58ab5a..a07e567 100644
--- a/include/asm-x86/mach-es7000/mach_apicdef.h
+++ b/include/asm-x86/mach-es7000/mach_apicdef.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
+#ifndef ASM_X86__MACH_ES7000__MACH_APICDEF_H
+#define ASM_X86__MACH_ES7000__MACH_APICDEF_H
 
 #define		APIC_ID_MASK		(0xFF<<24)
 
@@ -10,4 +10,4 @@
 
 #define		GET_APIC_ID(x)	get_apic_id(x)
 
-#endif
+#endif /* ASM_X86__MACH_ES7000__MACH_APICDEF_H */

diff --git a/include/asm-x86/mach-es7000/mach_ipi.h b/include/asm-x86/mach-es7000/mach_ipi.h
index 5e61bd2..3a21240 100644
--- a/include/asm-x86/mach-es7000/mach_ipi.h
+++ b/include/asm-x86/mach-es7000/mach_ipi.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef ASM_X86__MACH_ES7000__MACH_IPI_H
+#define ASM_X86__MACH_ES7000__MACH_IPI_H
 
 void send_IPI_mask_sequence(cpumask_t mask, int vector);
 
@@ -21,4 +21,4 @@
 	send_IPI_mask(cpu_online_map, vector);
 }
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* ASM_X86__MACH_ES7000__MACH_IPI_H */

diff --git a/include/asm-x86/mach-es7000/mach_mpparse.h b/include/asm-x86/mach-es7000/mach_mpparse.h
index ef26d35..befde24 100644
--- a/include/asm-x86/mach-es7000/mach_mpparse.h
+++ b/include/asm-x86/mach-es7000/mach_mpparse.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPPARSE_H
-#define __ASM_MACH_MPPARSE_H
+#ifndef ASM_X86__MACH_ES7000__MACH_MPPARSE_H
+#define ASM_X86__MACH_ES7000__MACH_MPPARSE_H
 
 #include <linux/acpi.h>
 
@@ -26,4 +26,4 @@
 }
 #endif
 
-#endif /* __ASM_MACH_MPPARSE_H */
+#endif /* ASM_X86__MACH_ES7000__MACH_MPPARSE_H */

diff --git a/include/asm-x86/mach-es7000/mach_wakecpu.h b/include/asm-x86/mach-es7000/mach_wakecpu.h
index 84ff583..97c776c 100644
--- a/include/asm-x86/mach-es7000/mach_wakecpu.h
+++ b/include/asm-x86/mach-es7000/mach_wakecpu.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_WAKECPU_H
-#define __ASM_MACH_WAKECPU_H
+#ifndef ASM_X86__MACH_ES7000__MACH_WAKECPU_H
+#define ASM_X86__MACH_ES7000__MACH_WAKECPU_H
 
 /* 
  * This file copes with machines that wakeup secondary CPUs by the
@@ -56,4 +56,4 @@
  #define inquire_remote_apic(apicid) {}
 #endif
 
-#endif /* __ASM_MACH_WAKECPU_H */
+#endif /* ASM_X86__MACH_ES7000__MACH_WAKECPU_H */

diff --git a/include/asm-x86/mach-generic/gpio.h b/include/asm-x86/mach-generic/gpio.h
index 5305dcb..6ce0f77 100644
--- a/include/asm-x86/mach-generic/gpio.h
+++ b/include/asm-x86/mach-generic/gpio.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_GENERIC_GPIO_H
-#define __ASM_MACH_GENERIC_GPIO_H
+#ifndef ASM_X86__MACH_GENERIC__GPIO_H
+#define ASM_X86__MACH_GENERIC__GPIO_H
 
 int gpio_request(unsigned gpio, const char *label);
 void gpio_free(unsigned gpio);
@@ -12,4 +12,4 @@
 
 #include <asm-generic/gpio.h>           /* cansleep wrappers */
 
-#endif /* __ASM_MACH_GENERIC_GPIO_H */
+#endif /* ASM_X86__MACH_GENERIC__GPIO_H */

diff --git a/include/asm-x86/mach-generic/irq_vectors_limits.h b/include/asm-x86/mach-generic/irq_vectors_limits.h
index 890ce3f..f7870e1 100644
--- a/include/asm-x86/mach-generic/irq_vectors_limits.h
+++ b/include/asm-x86/mach-generic/irq_vectors_limits.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_IRQ_VECTORS_LIMITS_H
-#define _ASM_IRQ_VECTORS_LIMITS_H
+#ifndef ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
+#define ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H
 
 /*
  * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
@@ -11,4 +11,4 @@
 #define NR_IRQS	224
 #define NR_IRQ_VECTORS	1024
 
-#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
+#endif /* ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H */

diff --git a/include/asm-x86/mach-generic/mach_apic.h b/include/asm-x86/mach-generic/mach_apic.h
index 6eff343..5d010c6 100644
--- a/include/asm-x86/mach-generic/mach_apic.h
+++ b/include/asm-x86/mach-generic/mach_apic.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_GENERIC__MACH_APIC_H
+#define ASM_X86__MACH_GENERIC__MACH_APIC_H
 
 #include <asm/genapic.h>
 
@@ -29,4 +29,4 @@
 
 extern void generic_bigsmp_probe(void);
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_GENERIC__MACH_APIC_H */

diff --git a/include/asm-x86/mach-generic/mach_apicdef.h b/include/asm-x86/mach-generic/mach_apicdef.h
index 28ed989..1657f38 100644
--- a/include/asm-x86/mach-generic/mach_apicdef.h
+++ b/include/asm-x86/mach-generic/mach_apicdef.h

@@ -1,5 +1,5 @@
-#ifndef _GENAPIC_MACH_APICDEF_H
-#define _GENAPIC_MACH_APICDEF_H 1
+#ifndef ASM_X86__MACH_GENERIC__MACH_APICDEF_H
+#define ASM_X86__MACH_GENERIC__MACH_APICDEF_H
 
 #ifndef APIC_DEFINITION
 #include <asm/genapic.h>
@@ -8,4 +8,4 @@
 #define APIC_ID_MASK (genapic->apic_id_mask)
 #endif
 
-#endif
+#endif /* ASM_X86__MACH_GENERIC__MACH_APICDEF_H */

diff --git a/include/asm-x86/mach-generic/mach_ipi.h b/include/asm-x86/mach-generic/mach_ipi.h
index 441b0fe..f67433d 100644
--- a/include/asm-x86/mach-generic/mach_ipi.h
+++ b/include/asm-x86/mach-generic/mach_ipi.h

@@ -1,5 +1,5 @@
-#ifndef _MACH_IPI_H
-#define _MACH_IPI_H 1
+#ifndef ASM_X86__MACH_GENERIC__MACH_IPI_H
+#define ASM_X86__MACH_GENERIC__MACH_IPI_H
 
 #include <asm/genapic.h>
 
@@ -7,4 +7,4 @@
 #define send_IPI_allbutself (genapic->send_IPI_allbutself)
 #define send_IPI_all (genapic->send_IPI_all)
 
-#endif
+#endif /* ASM_X86__MACH_GENERIC__MACH_IPI_H */

diff --git a/include/asm-x86/mach-generic/mach_mpparse.h b/include/asm-x86/mach-generic/mach_mpparse.h
index 586cadb..3115564 100644
--- a/include/asm-x86/mach-generic/mach_mpparse.h
+++ b/include/asm-x86/mach-generic/mach_mpparse.h

@@ -1,5 +1,5 @@
-#ifndef _MACH_MPPARSE_H
-#define _MACH_MPPARSE_H 1
+#ifndef ASM_X86__MACH_GENERIC__MACH_MPPARSE_H
+#define ASM_X86__MACH_GENERIC__MACH_MPPARSE_H
 
 
 extern int mps_oem_check(struct mp_config_table *mpc, char *oem,
@@ -7,4 +7,4 @@
 
 extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id);
 
-#endif
+#endif /* ASM_X86__MACH_GENERIC__MACH_MPPARSE_H */

diff --git a/include/asm-x86/mach-generic/mach_mpspec.h b/include/asm-x86/mach-generic/mach_mpspec.h
index c83c120..6061b15 100644
--- a/include/asm-x86/mach-generic/mach_mpspec.h
+++ b/include/asm-x86/mach-generic/mach_mpspec.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPSPEC_H
-#define __ASM_MACH_MPSPEC_H
+#ifndef ASM_X86__MACH_GENERIC__MACH_MPSPEC_H
+#define ASM_X86__MACH_GENERIC__MACH_MPSPEC_H
 
 #define MAX_IRQ_SOURCES 256
 
@@ -9,4 +9,4 @@
 
 extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
 				char *productid);
-#endif /* __ASM_MACH_MPSPEC_H */
+#endif /* ASM_X86__MACH_GENERIC__MACH_MPSPEC_H */

diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/mach-numaq/mach_apic.h
index d802465..7a0d39e 100644
--- a/include/asm-x86/mach-numaq/mach_apic.h
+++ b/include/asm-x86/mach-numaq/mach_apic.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_NUMAQ__MACH_APIC_H
+#define ASM_X86__MACH_NUMAQ__MACH_APIC_H
 
 #include <asm/io.h>
 #include <linux/mmzone.h>
@@ -135,4 +135,4 @@
 	return cpuid_apic >> index_msb;
 }
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_NUMAQ__MACH_APIC_H */

diff --git a/include/asm-x86/mach-numaq/mach_apicdef.h b/include/asm-x86/mach-numaq/mach_apicdef.h
index bf439d0..f870ec5 100644
--- a/include/asm-x86/mach-numaq/mach_apicdef.h
+++ b/include/asm-x86/mach-numaq/mach_apicdef.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
+#ifndef ASM_X86__MACH_NUMAQ__MACH_APICDEF_H
+#define ASM_X86__MACH_NUMAQ__MACH_APICDEF_H
 
 
 #define APIC_ID_MASK (0xF<<24)
@@ -11,4 +11,4 @@
 
 #define         GET_APIC_ID(x)  get_apic_id(x)
 
-#endif
+#endif /* ASM_X86__MACH_NUMAQ__MACH_APICDEF_H */

diff --git a/include/asm-x86/mach-numaq/mach_ipi.h b/include/asm-x86/mach-numaq/mach_ipi.h
index c604448..1e83582 100644
--- a/include/asm-x86/mach-numaq/mach_ipi.h
+++ b/include/asm-x86/mach-numaq/mach_ipi.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef ASM_X86__MACH_NUMAQ__MACH_IPI_H
+#define ASM_X86__MACH_NUMAQ__MACH_IPI_H
 
 void send_IPI_mask_sequence(cpumask_t, int vector);
 
@@ -22,4 +22,4 @@
 	send_IPI_mask(cpu_online_map, vector);
 }
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* ASM_X86__MACH_NUMAQ__MACH_IPI_H */

diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h
index 626aef6..74ade18 100644
--- a/include/asm-x86/mach-numaq/mach_mpparse.h
+++ b/include/asm-x86/mach-numaq/mach_mpparse.h

@@ -1,7 +1,7 @@
-#ifndef __ASM_MACH_MPPARSE_H
-#define __ASM_MACH_MPPARSE_H
+#ifndef ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H
+#define ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H
 
 extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
 				char *productid);
 
-#endif /* __ASM_MACH_MPPARSE_H */
+#endif /* ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H */

diff --git a/include/asm-x86/mach-numaq/mach_wakecpu.h b/include/asm-x86/mach-numaq/mach_wakecpu.h
index 0053004..0db8cea 100644
--- a/include/asm-x86/mach-numaq/mach_wakecpu.h
+++ b/include/asm-x86/mach-numaq/mach_wakecpu.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_WAKECPU_H
-#define __ASM_MACH_WAKECPU_H
+#ifndef ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H
+#define ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H
 
 /* This file copes with machines that wakeup secondary CPUs by NMIs */
 
@@ -40,4 +40,4 @@
 
 #define inquire_remote_apic(apicid) {}
 
-#endif /* __ASM_MACH_WAKECPU_H */
+#endif /* ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H */

diff --git a/include/asm-x86/mach-rdc321x/gpio.h b/include/asm-x86/mach-rdc321x/gpio.h
index acce0b7..94b6cdf 100644
--- a/include/asm-x86/mach-rdc321x/gpio.h
+++ b/include/asm-x86/mach-rdc321x/gpio.h

@@ -1,5 +1,7 @@
-#ifndef _RDC321X_GPIO_H
-#define _RDC321X_GPIO_H
+#ifndef ASM_X86__MACH_RDC321X__GPIO_H
+#define ASM_X86__MACH_RDC321X__GPIO_H
+
+#include <linux/kernel.h>
 
 extern int rdc_gpio_get_value(unsigned gpio);
 extern void rdc_gpio_set_value(unsigned gpio, int value);
@@ -18,6 +20,7 @@
 
 static inline void gpio_free(unsigned gpio)
 {
+	might_sleep();
 	rdc_gpio_free(gpio);
 }
 
@@ -54,4 +57,4 @@
 /* For cansleep */
 #include <asm-generic/gpio.h>
 
-#endif /* _RDC321X_GPIO_H_ */
+#endif /* ASM_X86__MACH_RDC321X__GPIO_H */

diff --git a/include/asm-x86/mach-summit/irq_vectors_limits.h b/include/asm-x86/mach-summit/irq_vectors_limits.h
index 890ce3f..22f376a 100644
--- a/include/asm-x86/mach-summit/irq_vectors_limits.h
+++ b/include/asm-x86/mach-summit/irq_vectors_limits.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_IRQ_VECTORS_LIMITS_H
-#define _ASM_IRQ_VECTORS_LIMITS_H
+#ifndef ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H
+#define ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H
 
 /*
  * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
@@ -11,4 +11,4 @@
 #define NR_IRQS	224
 #define NR_IRQ_VECTORS	1024
 
-#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
+#endif /* ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H */

diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/mach-summit/mach_apic.h
index c47e2ab..7a66758 100644
--- a/include/asm-x86/mach-summit/mach_apic.h
+++ b/include/asm-x86/mach-summit/mach_apic.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APIC_H
-#define __ASM_MACH_APIC_H
+#ifndef ASM_X86__MACH_SUMMIT__MACH_APIC_H
+#define ASM_X86__MACH_SUMMIT__MACH_APIC_H
 
 #include <asm/smp.h>
 
@@ -182,4 +182,4 @@
 	return hard_smp_processor_id() >> index_msb;
 }
 
-#endif /* __ASM_MACH_APIC_H */
+#endif /* ASM_X86__MACH_SUMMIT__MACH_APIC_H */

diff --git a/include/asm-x86/mach-summit/mach_apicdef.h b/include/asm-x86/mach-summit/mach_apicdef.h
index a58ab5a..d4bc859 100644
--- a/include/asm-x86/mach-summit/mach_apicdef.h
+++ b/include/asm-x86/mach-summit/mach_apicdef.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_APICDEF_H
-#define __ASM_MACH_APICDEF_H
+#ifndef ASM_X86__MACH_SUMMIT__MACH_APICDEF_H
+#define ASM_X86__MACH_SUMMIT__MACH_APICDEF_H
 
 #define		APIC_ID_MASK		(0xFF<<24)
 
@@ -10,4 +10,4 @@
 
 #define		GET_APIC_ID(x)	get_apic_id(x)
 
-#endif
+#endif /* ASM_X86__MACH_SUMMIT__MACH_APICDEF_H */

diff --git a/include/asm-x86/mach-summit/mach_ipi.h b/include/asm-x86/mach-summit/mach_ipi.h
index 9404c53..a3b31c5 100644
--- a/include/asm-x86/mach-summit/mach_ipi.h
+++ b/include/asm-x86/mach-summit/mach_ipi.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_IPI_H
-#define __ASM_MACH_IPI_H
+#ifndef ASM_X86__MACH_SUMMIT__MACH_IPI_H
+#define ASM_X86__MACH_SUMMIT__MACH_IPI_H
 
 void send_IPI_mask_sequence(cpumask_t mask, int vector);
 
@@ -22,4 +22,4 @@
 	send_IPI_mask(cpu_online_map, vector);
 }
 
-#endif /* __ASM_MACH_IPI_H */
+#endif /* ASM_X86__MACH_SUMMIT__MACH_IPI_H */

diff --git a/include/asm-x86/mach-summit/mach_mpparse.h b/include/asm-x86/mach-summit/mach_mpparse.h
index fdf5917..92396f2 100644
--- a/include/asm-x86/mach-summit/mach_mpparse.h
+++ b/include/asm-x86/mach-summit/mach_mpparse.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MACH_MPPARSE_H
-#define __ASM_MACH_MPPARSE_H
+#ifndef ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H
+#define ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H
 
 #include <mach_apic.h>
 #include <asm/tsc.h>
@@ -107,4 +107,4 @@
 		rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
 }
 
-#endif /* __ASM_MACH_MPPARSE_H */
+#endif /* ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H */

diff --git a/include/asm-x86/math_emu.h b/include/asm-x86/math_emu.h
index 9bf4ae9..5768d8e 100644
--- a/include/asm-x86/math_emu.h
+++ b/include/asm-x86/math_emu.h

@@ -1,5 +1,5 @@
-#ifndef _I386_MATH_EMU_H
-#define _I386_MATH_EMU_H
+#ifndef ASM_X86__MATH_EMU_H
+#define ASM_X86__MATH_EMU_H
 
 /* This structure matches the layout of the data saved to the stack
    following a device-not-present interrupt, part of it saved
@@ -28,4 +28,4 @@
 	long ___vm86_fs;
 	long ___vm86_gs;
 };
-#endif
+#endif /* ASM_X86__MATH_EMU_H */

diff --git a/include/asm-x86/mc146818rtc.h b/include/asm-x86/mc146818rtc.h
index daf1ccd..a995f33 100644
--- a/include/asm-x86/mc146818rtc.h
+++ b/include/asm-x86/mc146818rtc.h

@@ -1,8 +1,8 @@
 /*
  * Machine dependent access functions for RTC registers.
  */
-#ifndef _ASM_MC146818RTC_H
-#define _ASM_MC146818RTC_H
+#ifndef ASM_X86__MC146818RTC_H
+#define ASM_X86__MC146818RTC_H
 
 #include <asm/io.h>
 #include <asm/system.h>
@@ -101,4 +101,4 @@
 
 #define RTC_IRQ 8
 
-#endif /* _ASM_MC146818RTC_H */
+#endif /* ASM_X86__MC146818RTC_H */

diff --git a/include/asm-x86/mca.h b/include/asm-x86/mca.h
index 09adf2e..60d1ed2 100644
--- a/include/asm-x86/mca.h
+++ b/include/asm-x86/mca.h

@@ -1,8 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8 -*- */
 
 /* Platform specific MCA defines */
-#ifndef _ASM_MCA_H
-#define _ASM_MCA_H
+#ifndef ASM_X86__MCA_H
+#define ASM_X86__MCA_H
 
 /* Maximal number of MCA slots - actually, some machines have less, but
  * they all have sufficient number of POS registers to cover 8.
@@ -40,4 +40,4 @@
  */
 #define MCA_NUMADAPTERS (MCA_MAX_SLOT_NR+3)
 
-#endif
+#endif /* ASM_X86__MCA_H */

diff --git a/include/asm-x86/mca_dma.h b/include/asm-x86/mca_dma.h
index c3dca6e..49f22be 100644
--- a/include/asm-x86/mca_dma.h
+++ b/include/asm-x86/mca_dma.h

@@ -1,5 +1,5 @@
-#ifndef MCA_DMA_H
-#define MCA_DMA_H
+#ifndef ASM_X86__MCA_DMA_H
+#define ASM_X86__MCA_DMA_H
 
 #include <asm/io.h>
 #include <linux/ioport.h>
@@ -198,4 +198,4 @@
 	outb(mode, MCA_DMA_REG_EXE);
 }
 
-#endif /* MCA_DMA_H */
+#endif /* ASM_X86__MCA_DMA_H */

diff --git a/include/asm-x86/mce.h b/include/asm-x86/mce.h
index 531eaa5..036133e 100644
--- a/include/asm-x86/mce.h
+++ b/include/asm-x86/mce.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_MCE_H
-#define _ASM_X86_MCE_H
+#ifndef ASM_X86__MCE_H
+#define ASM_X86__MCE_H
 
 #ifdef __x86_64__
 
@@ -127,4 +127,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__MCE_H */

diff --git a/include/asm-x86/mman.h b/include/asm-x86/mman.h
index 90bc410..4ef28e6 100644
--- a/include/asm-x86/mman.h
+++ b/include/asm-x86/mman.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_MMAN_H
-#define _ASM_X86_MMAN_H
+#ifndef ASM_X86__MMAN_H
+#define ASM_X86__MMAN_H
 
 #include <asm-generic/mman.h>
 
@@ -17,4 +17,4 @@
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
 
-#endif /* _ASM_X86_MMAN_H */
+#endif /* ASM_X86__MMAN_H */

diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h
index e293ab8..fb79b1c 100644
--- a/include/asm-x86/mmconfig.h
+++ b/include/asm-x86/mmconfig.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_MMCONFIG_H
-#define _ASM_MMCONFIG_H
+#ifndef ASM_X86__MMCONFIG_H
+#define ASM_X86__MMCONFIG_H
 
 #ifdef CONFIG_PCI_MMCONFIG
 extern void __cpuinit fam10h_check_enable_mmcfg(void);
@@ -9,4 +9,4 @@
 static inline void check_enable_amd_mmconf_dmi(void) { }
 #endif
 
-#endif
+#endif /* ASM_X86__MMCONFIG_H */

diff --git a/include/asm-x86/mmu.h b/include/asm-x86/mmu.h
index 00e8867..9d5aff1 100644
--- a/include/asm-x86/mmu.h
+++ b/include/asm-x86/mmu.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_MMU_H
-#define _ASM_X86_MMU_H
+#ifndef ASM_X86__MMU_H
+#define ASM_X86__MMU_H
 
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
@@ -7,14 +7,9 @@
 /*
  * The x86 doesn't have a mmu context, but
  * we put the segment information here.
- *
- * cpu_vm_mask is used to optimize ldt flushing.
  */
 typedef struct {
 	void *ldt;
-#ifdef CONFIG_X86_64
-	rwlock_t ldtlock;
-#endif
 	int size;
 	struct mutex lock;
 	void *vdso;
@@ -28,4 +23,4 @@
 }
 #endif
 
-#endif /* _ASM_X86_MMU_H */
+#endif /* ASM_X86__MMU_H */

diff --git a/include/asm-x86/mmu_context.h b/include/asm-x86/mmu_context.h
index fac5701..8ec940b 100644
--- a/include/asm-x86/mmu_context.h
+++ b/include/asm-x86/mmu_context.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_X86_MMU_CONTEXT_H
-#define __ASM_X86_MMU_CONTEXT_H
+#ifndef ASM_X86__MMU_CONTEXT_H
+#define ASM_X86__MMU_CONTEXT_H
 
 #include <asm/desc.h>
 #include <asm/atomic.h>
@@ -34,4 +34,4 @@
 } while (0);
 
 
-#endif /* __ASM_X86_MMU_CONTEXT_H */
+#endif /* ASM_X86__MMU_CONTEXT_H */

diff --git a/include/asm-x86/mmu_context_32.h b/include/asm-x86/mmu_context_32.h
index 824fc57..cce6f6e 100644
--- a/include/asm-x86/mmu_context_32.h
+++ b/include/asm-x86/mmu_context_32.h

@@ -1,5 +1,5 @@
-#ifndef __I386_SCHED_H
-#define __I386_SCHED_H
+#ifndef ASM_X86__MMU_CONTEXT_32_H
+#define ASM_X86__MMU_CONTEXT_32_H
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
@@ -53,4 +53,4 @@
 #define deactivate_mm(tsk, mm)			\
 	asm("movl %0,%%gs": :"r" (0));
 
-#endif
+#endif /* ASM_X86__MMU_CONTEXT_32_H */

diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h
index c700063..2675867 100644
--- a/include/asm-x86/mmu_context_64.h
+++ b/include/asm-x86/mmu_context_64.h

@@ -1,5 +1,5 @@
-#ifndef __X86_64_MMU_CONTEXT_H
-#define __X86_64_MMU_CONTEXT_H
+#ifndef ASM_X86__MMU_CONTEXT_64_H
+#define ASM_X86__MMU_CONTEXT_64_H
 
 #include <asm/pda.h>
 
@@ -51,4 +51,4 @@
 	asm volatile("movl %0,%%fs"::"r"(0));	\
 } while (0)
 
-#endif
+#endif /* ASM_X86__MMU_CONTEXT_64_H */

diff --git a/include/asm-x86/mmx.h b/include/asm-x86/mmx.h
index 9408812..2e7299b 100644
--- a/include/asm-x86/mmx.h
+++ b/include/asm-x86/mmx.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_MMX_H
-#define _ASM_MMX_H
+#ifndef ASM_X86__MMX_H
+#define ASM_X86__MMX_H
 
 /*
  *	MMX 3Dnow! helper operations
@@ -11,4 +11,4 @@
 extern void mmx_clear_page(void *page);
 extern void mmx_copy_page(void *to, void *from);
 
-#endif
+#endif /* ASM_X86__MMX_H */

diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h
index 5862e64..121b65d 100644
--- a/include/asm-x86/mmzone_32.h
+++ b/include/asm-x86/mmzone_32.h

@@ -3,8 +3,8 @@
  *
  */
 
-#ifndef _ASM_MMZONE_H_
-#define _ASM_MMZONE_H_
+#ifndef ASM_X86__MMZONE_32_H
+#define ASM_X86__MMZONE_32_H
 
 #include <asm/smp.h>
 
@@ -131,4 +131,4 @@
 })
 #endif /* CONFIG_NEED_MULTIPLE_NODES */
 
-#endif /* _ASM_MMZONE_H_ */
+#endif /* ASM_X86__MMZONE_32_H */

diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h
index 594bd0d..626b03a 100644
--- a/include/asm-x86/mmzone_64.h
+++ b/include/asm-x86/mmzone_64.h

@@ -1,8 +1,8 @@
 /* K8 NUMA support */
 /* Copyright 2002,2003 by Andi Kleen, SuSE Labs */
 /* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */
-#ifndef _ASM_X86_64_MMZONE_H
-#define _ASM_X86_64_MMZONE_H 1
+#ifndef ASM_X86__MMZONE_64_H
+#define ASM_X86__MMZONE_64_H
 
 
 #ifdef CONFIG_NUMA
@@ -49,4 +49,4 @@
 #endif
 
 #endif
-#endif
+#endif /* ASM_X86__MMZONE_64_H */

diff --git a/include/asm-x86/module.h b/include/asm-x86/module.h
index bfedb24..48dc3e0 100644
--- a/include/asm-x86/module.h
+++ b/include/asm-x86/module.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_MODULE_H
-#define _ASM_MODULE_H
+#ifndef ASM_X86__MODULE_H
+#define ASM_X86__MODULE_H
 
 /* x86_32/64 are simple */
 struct mod_arch_specific {};
@@ -79,4 +79,4 @@
 # define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
 #endif
 
-#endif /* _ASM_MODULE_H */
+#endif /* ASM_X86__MODULE_H */

diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index b6995e5..118da36 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h

@@ -1,5 +1,5 @@
-#ifndef _AM_X86_MPSPEC_H
-#define _AM_X86_MPSPEC_H
+#ifndef ASM_X86__MPSPEC_H
+#define ASM_X86__MPSPEC_H
 
 #include <linux/init.h>
 
@@ -141,4 +141,4 @@
 
 extern physid_mask_t phys_cpu_present_map;
 
-#endif
+#endif /* ASM_X86__MPSPEC_H */

diff --git a/include/asm-x86/mpspec_def.h b/include/asm-x86/mpspec_def.h
index 38d1e73..79166b0 100644
--- a/include/asm-x86/mpspec_def.h
+++ b/include/asm-x86/mpspec_def.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MPSPEC_DEF_H
-#define __ASM_MPSPEC_DEF_H
+#ifndef ASM_X86__MPSPEC_DEF_H
+#define ASM_X86__MPSPEC_DEF_H
 
 /*
  * Structure definitions for SMP machines following the
@@ -177,4 +177,4 @@
 	MP_BUS_PCI,
 	MP_BUS_MCA,
 };
-#endif
+#endif /* ASM_X86__MPSPEC_DEF_H */

diff --git a/include/asm-x86/msgbuf.h b/include/asm-x86/msgbuf.h
index 7e4e948..1b538c9 100644
--- a/include/asm-x86/msgbuf.h
+++ b/include/asm-x86/msgbuf.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_MSGBUF_H
-#define _ASM_X86_MSGBUF_H
+#ifndef ASM_X86__MSGBUF_H
+#define ASM_X86__MSGBUF_H
 
 /*
  * The msqid64_ds structure for i386 architecture.
@@ -36,4 +36,4 @@
 	unsigned long  __unused5;
 };
 
-#endif /* _ASM_X86_MSGBUF_H */
+#endif /* ASM_X86__MSGBUF_H */

diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h
index 296f29c..3139666 100644
--- a/include/asm-x86/msidef.h
+++ b/include/asm-x86/msidef.h

@@ -1,5 +1,5 @@
-#ifndef ASM_MSIDEF_H
-#define ASM_MSIDEF_H
+#ifndef ASM_X86__MSIDEF_H
+#define ASM_X86__MSIDEF_H
 
 /*
  * Constants for Intel APIC based MSI messages.
@@ -48,4 +48,4 @@
 #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
 					 MSI_ADDR_DEST_ID_MASK)
 
-#endif /* ASM_MSIDEF_H */
+#endif /* ASM_X86__MSIDEF_H */

diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index 44bce77..3052f05 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_MSR_INDEX_H
-#define __ASM_MSR_INDEX_H
+#ifndef ASM_X86__MSR_INDEX_H
+#define ASM_X86__MSR_INDEX_H
 
 /* CPU model specific register (MSR) numbers */
 
@@ -310,4 +310,4 @@
 /* Geode defined MSRs */
 #define MSR_GEODE_BUSCONT_CONF0		0x00001900
 
-#endif /* __ASM_MSR_INDEX_H */
+#endif /* ASM_X86__MSR_INDEX_H */

diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 2362cfd..530af1f 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_X86_MSR_H_
-#define __ASM_X86_MSR_H_
+#ifndef ASM_X86__MSR_H
+#define ASM_X86__MSR_H
 
 #include <asm/msr-index.h>
 
@@ -63,6 +63,22 @@
 	return EAX_EDX_VAL(val, low, high);
 }
 
+static inline unsigned long long native_read_msr_amd_safe(unsigned int msr,
+						      int *err)
+{
+	DECLARE_ARGS(val, low, high);
+
+	asm volatile("2: rdmsr ; xor %0,%0\n"
+		     "1:\n\t"
+		     ".section .fixup,\"ax\"\n\t"
+		     "3:  mov %3,%0 ; jmp 1b\n\t"
+		     ".previous\n\t"
+		     _ASM_EXTABLE(2b, 3b)
+		     : "=r" (*err), EAX_EDX_RET(val, low, high)
+		     : "c" (msr), "D" (0x9c5a203a), "i" (-EFAULT));
+	return EAX_EDX_VAL(val, low, high);
+}
+
 static inline void native_write_msr(unsigned int msr,
 				    unsigned low, unsigned high)
 {
@@ -158,6 +174,13 @@
 	*p = native_read_msr_safe(msr, &err);
 	return err;
 }
+static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+{
+	int err;
+
+	*p = native_read_msr_amd_safe(msr, &err);
+	return err;
+}
 
 #define rdtscl(low)						\
 	((low) = (u32)native_read_tsc())
@@ -221,4 +244,4 @@
 #endif /* __KERNEL__ */
 
 
-#endif
+#endif /* ASM_X86__MSR_H */

diff --git a/include/asm-x86/mtrr.h b/include/asm-x86/mtrr.h
index a69a01a..23a7f83 100644
--- a/include/asm-x86/mtrr.h
+++ b/include/asm-x86/mtrr.h

@@ -20,8 +20,8 @@
     The postal address is:
       Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
 */
-#ifndef _ASM_X86_MTRR_H
-#define _ASM_X86_MTRR_H
+#ifndef ASM_X86__MTRR_H
+#define ASM_X86__MTRR_H
 
 #include <linux/ioctl.h>
 #include <linux/errno.h>
@@ -170,4 +170,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif  /*  _ASM_X86_MTRR_H  */
+#endif /* ASM_X86__MTRR_H */

diff --git a/include/asm-x86/mutex_32.h b/include/asm-x86/mutex_32.h
index 73e928e..25c16d8 100644
--- a/include/asm-x86/mutex_32.h
+++ b/include/asm-x86/mutex_32.h

@@ -6,8 +6,8 @@
  *
  *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
-#ifndef _ASM_MUTEX_H
-#define _ASM_MUTEX_H
+#ifndef ASM_X86__MUTEX_32_H
+#define ASM_X86__MUTEX_32_H
 
 #include <asm/alternative.h>
 
@@ -122,4 +122,4 @@
 #endif
 }
 
-#endif
+#endif /* ASM_X86__MUTEX_32_H */

diff --git a/include/asm-x86/mutex_64.h b/include/asm-x86/mutex_64.h
index f3fae9b..918ba21 100644
--- a/include/asm-x86/mutex_64.h
+++ b/include/asm-x86/mutex_64.h

@@ -6,8 +6,8 @@
  *
  *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
-#ifndef _ASM_MUTEX_H
-#define _ASM_MUTEX_H
+#ifndef ASM_X86__MUTEX_64_H
+#define ASM_X86__MUTEX_64_H
 
 /**
  * __mutex_fastpath_lock - decrement and call function if negative
@@ -97,4 +97,4 @@
 		return 0;
 }
 
-#endif
+#endif /* ASM_X86__MUTEX_64_H */

diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 21f8d02..d5e715f 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_NMI_H_
-#define _ASM_X86_NMI_H_
+#ifndef ASM_X86__NMI_H
+#define ASM_X86__NMI_H
 
 #include <linux/pm.h>
 #include <asm/irq.h>
@@ -34,6 +34,7 @@
 extern void disable_timer_nmi_watchdog(void);
 extern void enable_timer_nmi_watchdog(void);
 extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
+extern void cpu_nmi_set_wd_enabled(void);
 
 extern atomic_t nmi_active;
 extern unsigned int nmi_watchdog;
@@ -81,4 +82,4 @@
 void stop_nmi(void);
 void restart_nmi(void);
 
-#endif
+#endif /* ASM_X86__NMI_H */

diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h
index ad0bedd..ae74272 100644
--- a/include/asm-x86/nops.h
+++ b/include/asm-x86/nops.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_NOPS_H
-#define _ASM_NOPS_H 1
+#ifndef ASM_X86__NOPS_H
+#define ASM_X86__NOPS_H
 
 /* Define nops for use with alternative() */
 
@@ -115,4 +115,4 @@
 
 #define ASM_NOP_MAX 8
 
-#endif
+#endif /* ASM_X86__NOPS_H */

diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h
index 220d7b7..44cb078 100644
--- a/include/asm-x86/numa_32.h
+++ b/include/asm-x86/numa_32.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_32_NUMA_H
-#define _ASM_X86_32_NUMA_H 1
+#ifndef ASM_X86__NUMA_32_H
+#define ASM_X86__NUMA_32_H
 
 extern int pxm_to_nid(int pxm);
 extern void numa_remove_cpu(int cpu);
@@ -8,4 +8,4 @@
 extern void set_highmem_pages_init(void);
 #endif
 
-#endif /* _ASM_X86_32_NUMA_H */
+#endif /* ASM_X86__NUMA_32_H */

diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index 3830094..15c9903 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X8664_NUMA_H
-#define _ASM_X8664_NUMA_H 1
+#ifndef ASM_X86__NUMA_64_H
+#define ASM_X86__NUMA_64_H
 
 #include <linux/nodemask.h>
 #include <asm/apicdef.h>
@@ -40,4 +40,4 @@
 static inline void numa_remove_cpu(int cpu)		{ }
 #endif
 
-#endif
+#endif /* ASM_X86__NUMA_64_H */

diff --git a/include/asm-x86/numaq.h b/include/asm-x86/numaq.h
index 34b92d5..124bf7d 100644
--- a/include/asm-x86/numaq.h
+++ b/include/asm-x86/numaq.h

@@ -23,8 +23,8 @@
  * Send feedback to <gone@us.ibm.com>
  */
 
-#ifndef NUMAQ_H
-#define NUMAQ_H
+#ifndef ASM_X86__NUMAQ_H
+#define ASM_X86__NUMAQ_H
 
 #ifdef CONFIG_X86_NUMAQ
 
@@ -165,5 +165,5 @@
 	return 0;
 }
 #endif /* CONFIG_X86_NUMAQ */
-#endif /* NUMAQ_H */
+#endif /* ASM_X86__NUMAQ_H */
 

diff --git a/include/asm-x86/olpc.h b/include/asm-x86/olpc.h
index 97d4713..d7328b1 100644
--- a/include/asm-x86/olpc.h
+++ b/include/asm-x86/olpc.h

@@ -1,7 +1,7 @@
 /* OLPC machine specific definitions */
 
-#ifndef ASM_OLPC_H_
-#define ASM_OLPC_H_
+#ifndef ASM_X86__OLPC_H
+#define ASM_X86__OLPC_H
 
 #include <asm/geode.h>
 
@@ -129,4 +129,4 @@
 #define OLPC_GPIO_LID		geode_gpio(26)
 #define OLPC_GPIO_ECSCI		geode_gpio(27)
 
-#endif
+#endif /* ASM_X86__OLPC_H */

diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 4998211..79544e6 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PAGE_H
-#define _ASM_X86_PAGE_H
+#ifndef ASM_X86__PAGE_H
+#define ASM_X86__PAGE_H
 
 #include <linux/const.h>
 
@@ -199,4 +199,4 @@
 #define __HAVE_ARCH_GATE_AREA 1
 
 #endif	/* __KERNEL__ */
-#endif	/* _ASM_X86_PAGE_H */
+#endif /* ASM_X86__PAGE_H */

diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index ab85287..72f7305 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PAGE_32_H
-#define _ASM_X86_PAGE_32_H
+#ifndef ASM_X86__PAGE_32_H
+#define ASM_X86__PAGE_32_H
 
 /*
  * This handles the memory map.
@@ -89,13 +89,11 @@
 extern unsigned int __VMALLOC_RESERVE;
 extern int sysctl_legacy_va_layout;
 
-#define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
-#define MAXMEM			(-__PAGE_OFFSET - __VMALLOC_RESERVE)
-
 extern void find_low_pfn_range(void);
 extern unsigned long init_memory_mapping(unsigned long start,
 					 unsigned long end);
 extern void initmem_init(unsigned long, unsigned long);
+extern void free_initmem(void);
 extern void setup_bootmem_allocator(void);
 
 
@@ -126,4 +124,4 @@
 #endif	/* CONFIG_X86_3DNOW */
 #endif	/* !__ASSEMBLY__ */
 
-#endif /* _ASM_X86_PAGE_32_H */
+#endif /* ASM_X86__PAGE_32_H */

diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index c6916c8..5e64acf 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h

@@ -1,5 +1,5 @@
-#ifndef _X86_64_PAGE_H
-#define _X86_64_PAGE_H
+#ifndef ASM_X86__PAGE_64_H
+#define ASM_X86__PAGE_64_H
 
 #define PAGETABLE_LEVELS	4
 
@@ -91,6 +91,7 @@
 					 unsigned long end);
 
 extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern void free_initmem(void);
 
 extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
 extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
@@ -102,4 +103,4 @@
 #endif
 
 
-#endif /* _X86_64_PAGE_H */
+#endif /* ASM_X86__PAGE_64_H */

diff --git a/include/asm-x86/param.h b/include/asm-x86/param.h
index 6f0d042..0009cfb 100644
--- a/include/asm-x86/param.h
+++ b/include/asm-x86/param.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PARAM_H
-#define _ASM_X86_PARAM_H
+#ifndef ASM_X86__PARAM_H
+#define ASM_X86__PARAM_H
 
 #ifdef __KERNEL__
 # define HZ		CONFIG_HZ	/* Internal kernel timer frequency */
@@ -19,4 +19,4 @@
 
 #define MAXHOSTNAMELEN	64	/* max length of hostname */
 
-#endif /* _ASM_X86_PARAM_H */
+#endif /* ASM_X86__PARAM_H */

diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index fbbde93..891971f 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_PARAVIRT_H
-#define __ASM_PARAVIRT_H
+#ifndef ASM_X86__PARAVIRT_H
+#define ASM_X86__PARAVIRT_H
 /* Various instructions on x86 need to be replaced for
  * para-virtualization: those hooks are defined here. */
 
@@ -137,6 +137,7 @@
 
 	/* MSR, PMC and TSR operations.
 	   err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
+	u64 (*read_msr_amd)(unsigned int msr, int *err);
 	u64 (*read_msr)(unsigned int msr, int *err);
 	int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
 
@@ -257,13 +258,13 @@
 	 * Hooks for allocating/releasing pagetable pages when they're
 	 * attached to a pagetable
 	 */
-	void (*alloc_pte)(struct mm_struct *mm, u32 pfn);
-	void (*alloc_pmd)(struct mm_struct *mm, u32 pfn);
-	void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
-	void (*alloc_pud)(struct mm_struct *mm, u32 pfn);
-	void (*release_pte)(u32 pfn);
-	void (*release_pmd)(u32 pfn);
-	void (*release_pud)(u32 pfn);
+	void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
+	void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
+	void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
+	void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
+	void (*release_pte)(unsigned long pfn);
+	void (*release_pmd)(unsigned long pfn);
+	void (*release_pud)(unsigned long pfn);
 
 	/* Pagetable manipulation functions */
 	void (*set_pte)(pte_t *ptep, pte_t pteval);
@@ -726,6 +727,10 @@
 {
 	return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
 }
+static inline u64 paravirt_read_msr_amd(unsigned msr, int *err)
+{
+	return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err);
+}
 static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
 {
 	return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
@@ -771,6 +776,13 @@
 	*p = paravirt_read_msr(msr, &err);
 	return err;
 }
+static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+{
+	int err;
+
+	*p = paravirt_read_msr_amd(msr, &err);
+	return err;
+}
 
 static inline u64 paravirt_read_tsc(void)
 {
@@ -993,35 +1005,35 @@
 	PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
 }
 
-static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn)
+static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn)
 {
 	PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
 }
-static inline void paravirt_release_pte(unsigned pfn)
+static inline void paravirt_release_pte(unsigned long pfn)
 {
 	PVOP_VCALL1(pv_mmu_ops.release_pte, pfn);
 }
 
-static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn)
+static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
 {
 	PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
 }
 
-static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn,
-					    unsigned start, unsigned count)
+static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
+					    unsigned long start, unsigned long count)
 {
 	PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count);
 }
-static inline void paravirt_release_pmd(unsigned pfn)
+static inline void paravirt_release_pmd(unsigned long pfn)
 {
 	PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
 }
 
-static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn)
+static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn)
 {
 	PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn);
 }
-static inline void paravirt_release_pud(unsigned pfn)
+static inline void paravirt_release_pud(unsigned long pfn)
 {
 	PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
 }
@@ -1634,4 +1646,4 @@
 
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_PARAVIRT */
-#endif	/* __ASM_PARAVIRT_H */
+#endif /* ASM_X86__PARAVIRT_H */

diff --git a/include/asm-x86/parport.h b/include/asm-x86/parport.h
index 3c4ffeb..2e3dda4 100644
--- a/include/asm-x86/parport.h
+++ b/include/asm-x86/parport.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PARPORT_H
-#define _ASM_X86_PARPORT_H
+#ifndef ASM_X86__PARPORT_H
+#define ASM_X86__PARPORT_H
 
 static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma);
 static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma)
@@ -7,4 +7,4 @@
 	return parport_pc_find_isa_ports(autoirq, autodma);
 }
 
-#endif /* _ASM_X86_PARPORT_H */
+#endif /* ASM_X86__PARPORT_H */

diff --git a/include/asm-x86/pat.h b/include/asm-x86/pat.h
index 7edc473..482c3e3 100644
--- a/include/asm-x86/pat.h
+++ b/include/asm-x86/pat.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_PAT_H
-#define _ASM_PAT_H
+#ifndef ASM_X86__PAT_H
+#define ASM_X86__PAT_H
 
 #include <linux/types.h>
 
@@ -19,4 +19,4 @@
 
 extern void pat_disable(char *reason);
 
-#endif
+#endif /* ASM_X86__PAT_H */

diff --git a/include/asm-x86/pci-direct.h b/include/asm-x86/pci-direct.h
index 80c775d..da42be0 100644
--- a/include/asm-x86/pci-direct.h
+++ b/include/asm-x86/pci-direct.h

@@ -1,5 +1,5 @@
-#ifndef ASM_PCI_DIRECT_H
-#define ASM_PCI_DIRECT_H 1
+#ifndef ASM_X86__PCI_DIRECT_H
+#define ASM_X86__PCI_DIRECT_H
 
 #include <linux/types.h>
 
@@ -18,4 +18,4 @@
 extern unsigned int pci_early_dump_regs;
 extern void early_dump_pci_device(u8 bus, u8 slot, u8 func);
 extern void early_dump_pci_devices(void);
-#endif
+#endif /* ASM_X86__PCI_DIRECT_H */

diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index 2db14cf..6025831 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h

@@ -1,5 +1,5 @@
-#ifndef __x86_PCI_H
-#define __x86_PCI_H
+#ifndef ASM_X86__PCI_H
+#define ASM_X86__PCI_H
 
 #include <linux/mm.h> /* for struct page */
 #include <linux/types.h>
@@ -111,4 +111,4 @@
 }
 #endif
 
-#endif
+#endif /* ASM_X86__PCI_H */

diff --git a/include/asm-x86/pci_32.h b/include/asm-x86/pci_32.h
index a50d468..3f22882 100644
--- a/include/asm-x86/pci_32.h
+++ b/include/asm-x86/pci_32.h

@@ -1,5 +1,5 @@
-#ifndef __i386_PCI_H
-#define __i386_PCI_H
+#ifndef ASM_X86__PCI_32_H
+#define ASM_X86__PCI_32_H
 
 
 #ifdef __KERNEL__
@@ -31,4 +31,4 @@
 #endif /* __KERNEL__ */
 
 
-#endif /* __i386_PCI_H */
+#endif /* ASM_X86__PCI_32_H */

diff --git a/include/asm-x86/pci_64.h b/include/asm-x86/pci_64.h
index f330234..f72e12d 100644
--- a/include/asm-x86/pci_64.h
+++ b/include/asm-x86/pci_64.h

@@ -1,5 +1,5 @@
-#ifndef __x8664_PCI_H
-#define __x8664_PCI_H
+#ifndef ASM_X86__PCI_64_H
+#define ASM_X86__PCI_64_H
 
 #ifdef __KERNEL__
 
@@ -63,4 +63,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif /* __x8664_PCI_H */
+#endif /* ASM_X86__PCI_64_H */

diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h
index b34e9a7..80860af 100644
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h

@@ -1,5 +1,5 @@
-#ifndef X86_64_PDA_H
-#define X86_64_PDA_H
+#ifndef ASM_X86__PDA_H
+#define ASM_X86__PDA_H
 
 #ifndef __ASSEMBLY__
 #include <linux/stddef.h>
@@ -134,4 +134,4 @@
 
 #define PDA_STACKOFFSET (5*8)
 
-#endif
+#endif /* ASM_X86__PDA_H */

diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h
index f643a3a..e10a1d0 100644
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PERCPU_H_
-#define _ASM_X86_PERCPU_H_
+#ifndef ASM_X86__PERCPU_H
+#define ASM_X86__PERCPU_H
 
 #ifdef CONFIG_X86_64
 #include <linux/compiler.h>
@@ -215,4 +215,4 @@
 
 #endif	/* !CONFIG_SMP */
 
-#endif /* _ASM_X86_PERCPU_H_ */
+#endif /* ASM_X86__PERCPU_H */

diff --git a/include/asm-x86/pgalloc.h b/include/asm-x86/pgalloc.h
index d63ea43..3cd23ad 100644
--- a/include/asm-x86/pgalloc.h
+++ b/include/asm-x86/pgalloc.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PGALLOC_H
-#define _ASM_X86_PGALLOC_H
+#ifndef ASM_X86__PGALLOC_H
+#define ASM_X86__PGALLOC_H
 
 #include <linux/threads.h>
 #include <linux/mm.h>		/* for struct page */
@@ -111,4 +111,4 @@
 #endif	/* PAGETABLE_LEVELS > 3 */
 #endif	/* PAGETABLE_LEVELS > 2 */
 
-#endif	/* _ASM_X86_PGALLOC_H */
+#endif /* ASM_X86__PGALLOC_H */

diff --git a/include/asm-x86/pgtable-2level-defs.h b/include/asm-x86/pgtable-2level-defs.h
index 0f71c9f..7ec48f4 100644
--- a/include/asm-x86/pgtable-2level-defs.h
+++ b/include/asm-x86/pgtable-2level-defs.h

@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_2LEVEL_DEFS_H
-#define _I386_PGTABLE_2LEVEL_DEFS_H
+#ifndef ASM_X86__PGTABLE_2LEVEL_DEFS_H
+#define ASM_X86__PGTABLE_2LEVEL_DEFS_H
 
 #define SHARED_KERNEL_PMD	0
 
@@ -17,4 +17,4 @@
 
 #define PTRS_PER_PTE	1024
 
-#endif /* _I386_PGTABLE_2LEVEL_DEFS_H */
+#endif /* ASM_X86__PGTABLE_2LEVEL_DEFS_H */

diff --git a/include/asm-x86/pgtable-2level.h b/include/asm-x86/pgtable-2level.h
index 46bc52c..8176208 100644
--- a/include/asm-x86/pgtable-2level.h
+++ b/include/asm-x86/pgtable-2level.h

@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_2LEVEL_H
-#define _I386_PGTABLE_2LEVEL_H
+#ifndef ASM_X86__PGTABLE_2LEVEL_H
+#define ASM_X86__PGTABLE_2LEVEL_H
 
 #define pte_ERROR(e) \
 	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low)
@@ -53,9 +53,7 @@
 #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
 #endif
 
-#define pte_page(x)		pfn_to_page(pte_pfn(x))
 #define pte_none(x)		(!(x).pte_low)
-#define pte_pfn(x)		(pte_val(x) >> PAGE_SHIFT)
 
 /*
  * Bits 0, 6 and 7 are taken, split up the 29 bits of offset
@@ -78,4 +76,4 @@
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_low })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
-#endif /* _I386_PGTABLE_2LEVEL_H */
+#endif /* ASM_X86__PGTABLE_2LEVEL_H */

diff --git a/include/asm-x86/pgtable-3level-defs.h b/include/asm-x86/pgtable-3level-defs.h
index 448ac95..c05fe6f 100644
--- a/include/asm-x86/pgtable-3level-defs.h
+++ b/include/asm-x86/pgtable-3level-defs.h

@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_3LEVEL_DEFS_H
-#define _I386_PGTABLE_3LEVEL_DEFS_H
+#ifndef ASM_X86__PGTABLE_3LEVEL_DEFS_H
+#define ASM_X86__PGTABLE_3LEVEL_DEFS_H
 
 #ifdef CONFIG_PARAVIRT
 #define SHARED_KERNEL_PMD	(pv_info.shared_kernel_pmd)
@@ -25,4 +25,4 @@
  */
 #define PTRS_PER_PTE	512
 
-#endif /* _I386_PGTABLE_3LEVEL_DEFS_H */
+#endif /* ASM_X86__PGTABLE_3LEVEL_DEFS_H */

diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h
index 105057f..75f4276 100644
--- a/include/asm-x86/pgtable-3level.h
+++ b/include/asm-x86/pgtable-3level.h

@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_3LEVEL_H
-#define _I386_PGTABLE_3LEVEL_H
+#ifndef ASM_X86__PGTABLE_3LEVEL_H
+#define ASM_X86__PGTABLE_3LEVEL_H
 
 /*
  * Intel Physical Address Extension (PAE) Mode - three-level page
@@ -151,18 +151,11 @@
 	return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
 }
 
-#define pte_page(x)	pfn_to_page(pte_pfn(x))
-
 static inline int pte_none(pte_t pte)
 {
 	return !pte.pte_low && !pte.pte_high;
 }
 
-static inline unsigned long pte_pfn(pte_t pte)
-{
-	return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
-}
-
 /*
  * Bits 0, 6 and 7 are taken in the low part of the pte,
  * put the 32 bits of offset into the high part.
@@ -179,4 +172,4 @@
 #define __pte_to_swp_entry(pte)		((swp_entry_t){ (pte).pte_high })
 #define __swp_entry_to_pte(x)		((pte_t){ { .pte_high = (x).val } })
 
-#endif /* _I386_PGTABLE_3LEVEL_H */
+#endif /* ASM_X86__PGTABLE_3LEVEL_H */

diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 04caa2f..888add7 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PGTABLE_H
-#define _ASM_X86_PGTABLE_H
+#ifndef ASM_X86__PGTABLE_H
+#define ASM_X86__PGTABLE_H
 
 #define FIRST_USER_ADDRESS	0
 
@@ -186,6 +186,13 @@
 	return pte_val(pte) & _PAGE_SPECIAL;
 }
 
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
+#define pte_page(pte)	pfn_to_page(pte_pfn(pte))
+
 static inline int pmd_large(pmd_t pte)
 {
 	return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
@@ -313,6 +320,8 @@
 static inline void native_pagetable_setup_done(pgd_t *base) {}
 #endif
 
+extern int arch_report_meminfo(char *page);
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else  /* !CONFIG_PARAVIRT */
@@ -521,4 +530,4 @@
 #include <asm-generic/pgtable.h>
 #endif	/* __ASSEMBLY__ */
 
-#endif	/* _ASM_X86_PGTABLE_H */
+#endif /* ASM_X86__PGTABLE_H */

diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 5c3b265..8de702d 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h

@@ -1,5 +1,5 @@
-#ifndef _I386_PGTABLE_H
-#define _I386_PGTABLE_H
+#ifndef ASM_X86__PGTABLE_32_H
+#define ASM_X86__PGTABLE_32_H
 
 
 /*
@@ -31,6 +31,7 @@
 static inline void check_pgt_cache(void) { }
 void paging_init(void);
 
+extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
 
 /*
  * The Linux x86 paging architecture is 'compile-time dual-mode', it
@@ -56,8 +57,7 @@
  * area for the same reason. ;)
  */
 #define VMALLOC_OFFSET	(8 * 1024 * 1024)
-#define VMALLOC_START	(((unsigned long)high_memory + 2 * VMALLOC_OFFSET - 1) \
-			 & ~(VMALLOC_OFFSET - 1))
+#define VMALLOC_START	((unsigned long)high_memory + VMALLOC_OFFSET)
 #ifdef CONFIG_X86_PAE
 #define LAST_PKMAP 512
 #else
@@ -73,6 +73,8 @@
 # define VMALLOC_END	(FIXADDR_START - 2 * PAGE_SIZE)
 #endif
 
+#define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
+
 /*
  * Define this if things work differently on an i386 and an i486:
  * it will (on an i486) warn about kernel memory accesses that are
@@ -186,4 +188,4 @@
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)	\
 	remap_pfn_range(vma, vaddr, pfn, size, prot)
 
-#endif /* _I386_PGTABLE_H */
+#endif /* ASM_X86__PGTABLE_32_H */

diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 549144d..fde9770 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h

@@ -1,5 +1,5 @@
-#ifndef _X86_64_PGTABLE_H
-#define _X86_64_PGTABLE_H
+#ifndef ASM_X86__PGTABLE_64_H
+#define ASM_X86__PGTABLE_64_H
 
 #include <linux/const.h>
 #ifndef __ASSEMBLY__
@@ -175,8 +175,6 @@
 #define pte_present(x)	(pte_val((x)) & (_PAGE_PRESENT | _PAGE_PROTNONE))
 
 #define pages_to_mb(x)	((x) >> (20 - PAGE_SHIFT))   /* FIXME: is this right? */
-#define pte_page(x)	pfn_to_page(pte_pfn((x)))
-#define pte_pfn(x)	((pte_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
 /*
  * Macro to mark a page protection value as "uncacheable".
@@ -284,4 +282,4 @@
 #define __HAVE_ARCH_PTE_SAME
 #endif /* !__ASSEMBLY__ */
 
-#endif /* _X86_64_PGTABLE_H */
+#endif /* ASM_X86__PGTABLE_64_H */

diff --git a/include/asm-x86/posix_types_32.h b/include/asm-x86/posix_types_32.h
index b031efd..70cf2bb 100644
--- a/include/asm-x86/posix_types_32.h
+++ b/include/asm-x86/posix_types_32.h

@@ -1,5 +1,5 @@
-#ifndef __ARCH_I386_POSIX_TYPES_H
-#define __ARCH_I386_POSIX_TYPES_H
+#ifndef ASM_X86__POSIX_TYPES_32_H
+#define ASM_X86__POSIX_TYPES_32_H
 
 /*
  * This file is generally used by user-level software, so you need to
@@ -82,4 +82,4 @@
 
 #endif /* defined(__KERNEL__) */
 
-#endif
+#endif /* ASM_X86__POSIX_TYPES_32_H */

diff --git a/include/asm-x86/posix_types_64.h b/include/asm-x86/posix_types_64.h
index d6624c9..388b4e7 100644
--- a/include/asm-x86/posix_types_64.h
+++ b/include/asm-x86/posix_types_64.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_POSIX_TYPES_H
-#define _ASM_X86_64_POSIX_TYPES_H
+#ifndef ASM_X86__POSIX_TYPES_64_H
+#define ASM_X86__POSIX_TYPES_64_H
 
 /*
  * This file is generally used by user-level software, so you need to
@@ -116,4 +116,4 @@
 
 #endif /* defined(__KERNEL__) */
 
-#endif
+#endif /* ASM_X86__POSIX_TYPES_64_H */

diff --git a/include/asm-x86/prctl.h b/include/asm-x86/prctl.h
index 52952ad..e7ae34e 100644
--- a/include/asm-x86/prctl.h
+++ b/include/asm-x86/prctl.h

@@ -1,5 +1,5 @@
-#ifndef X86_64_PRCTL_H
-#define X86_64_PRCTL_H 1
+#ifndef ASM_X86__PRCTL_H
+#define ASM_X86__PRCTL_H
 
 #define ARCH_SET_GS 0x1001
 #define ARCH_SET_FS 0x1002
@@ -7,4 +7,4 @@
 #define ARCH_GET_GS 0x1004
 
 
-#endif
+#endif /* ASM_X86__PRCTL_H */

diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h
index eff2ecd..5dd7977 100644
--- a/include/asm-x86/processor-flags.h
+++ b/include/asm-x86/processor-flags.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_I386_PROCESSOR_FLAGS_H
-#define __ASM_I386_PROCESSOR_FLAGS_H
+#ifndef ASM_X86__PROCESSOR_FLAGS_H
+#define ASM_X86__PROCESSOR_FLAGS_H
 /* Various flags defined: can be included from assembler. */
 
 /*
@@ -96,4 +96,4 @@
 #endif
 #endif
 
-#endif	/* __ASM_I386_PROCESSOR_FLAGS_H */
+#endif /* ASM_X86__PROCESSOR_FLAGS_H */

diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 4df3e2f..5eaf9bf 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_X86_PROCESSOR_H
-#define __ASM_X86_PROCESSOR_H
+#ifndef ASM_X86__PROCESSOR_H
+#define ASM_X86__PROCESSOR_H
 
 #include <asm/processor-flags.h>
 
@@ -20,6 +20,7 @@
 #include <asm/msr.h>
 #include <asm/desc_defs.h>
 #include <asm/nops.h>
+#include <asm/ds.h>
 
 #include <linux/personality.h>
 #include <linux/cpumask.h>
@@ -140,6 +141,8 @@
 #define current_cpu_data	boot_cpu_data
 #endif
 
+extern const struct seq_operations cpuinfo_op;
+
 static inline int hlt_works(int cpu)
 {
 #ifdef CONFIG_X86_32
@@ -153,6 +156,8 @@
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
 
+extern struct pt_regs *idle_regs(struct pt_regs *);
+
 extern void early_cpu_init(void);
 extern void identify_boot_cpu(void);
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
@@ -411,9 +416,14 @@
 	unsigned		io_bitmap_max;
 /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
 	unsigned long	debugctlmsr;
-/* Debug Store - if not 0 points to a DS Save Area configuration;
- *               goes into MSR_IA32_DS_AREA */
-	unsigned long	ds_area_msr;
+#ifdef CONFIG_X86_DS
+/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
+	struct ds_context	*ds_ctx;
+#endif /* CONFIG_X86_DS */
+#ifdef CONFIG_X86_PTRACE_BTS
+/* the signal to send on a bts buffer overflow */
+	unsigned int	bts_ovfl_signal;
+#endif /* CONFIG_X86_PTRACE_BTS */
 };
 
 static inline unsigned long native_get_debugreg(int regno)
@@ -943,4 +953,4 @@
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
-#endif
+#endif /* ASM_X86__PROCESSOR_H */

diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 3dd458c..6e89e8b 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X8664_PROTO_H
-#define _ASM_X8664_PROTO_H 1
+#ifndef ASM_X86__PROTO_H
+#define ASM_X86__PROTO_H
 
 #include <asm/ldt.h>
 
@@ -29,4 +29,4 @@
 #define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1))
 #define round_down(x, y) ((x) & ~((y) - 1))
 
-#endif
+#endif /* ASM_X86__PROTO_H */

diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index 72e7b9d..4298b88 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PTRACE_ABI_H
-#define _ASM_X86_PTRACE_ABI_H
+#ifndef ASM_X86__PTRACE_ABI_H
+#define ASM_X86__PTRACE_ABI_H
 
 #ifdef __i386__
 
@@ -80,8 +80,9 @@
 
 #define PTRACE_SINGLEBLOCK	33	/* resume execution until next branch */
 
-#ifndef __ASSEMBLY__
+#ifdef CONFIG_X86_PTRACE_BTS
 
+#ifndef __ASSEMBLY__
 #include <asm/types.h>
 
 /* configuration/status structure used in PTRACE_BTS_CONFIG and
@@ -97,20 +98,20 @@
 	/* actual size of bts_struct in bytes */
 	__u32 bts_size;
 };
-#endif
+#endif /* __ASSEMBLY__ */
 
 #define PTRACE_BTS_O_TRACE	0x1 /* branch trace */
 #define PTRACE_BTS_O_SCHED	0x2 /* scheduling events w/ jiffies */
 #define PTRACE_BTS_O_SIGNAL     0x4 /* send SIG<signal> on buffer overflow
 				       instead of wrapping around */
-#define PTRACE_BTS_O_CUT_SIZE	0x8 /* cut requested size to max available
-				       instead of failing */
+#define PTRACE_BTS_O_ALLOC	0x8 /* (re)allocate buffer */
 
 #define PTRACE_BTS_CONFIG	40
 /* Configure branch trace recording.
    ADDR points to a struct ptrace_bts_config.
    DATA gives the size of that buffer.
-   A new buffer is allocated, iff the size changes.
+   A new buffer is allocated, if requested in the flags.
+   An overflow signal may only be requested for new buffers.
    Returns the number of bytes read.
 */
 #define PTRACE_BTS_STATUS	41
@@ -119,7 +120,7 @@
    Returns the number of bytes written.
 */
 #define PTRACE_BTS_SIZE		42
-/* Return the number of available BTS records.
+/* Return the number of available BTS records for draining.
    DATA and ADDR are ignored.
 */
 #define PTRACE_BTS_GET		43
@@ -139,5 +140,6 @@
    BTS records are read from oldest to newest.
    Returns number of BTS records drained.
 */
+#endif /* CONFIG_X86_PTRACE_BTS */
 
-#endif
+#endif /* ASM_X86__PTRACE_ABI_H */

diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 8a71db8..d64a610 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PTRACE_H
-#define _ASM_X86_PTRACE_H
+#ifndef ASM_X86__PTRACE_H
+#define ASM_X86__PTRACE_H
 
 #include <linux/compiler.h>	/* For __user */
 #include <asm/ptrace-abi.h>
@@ -127,14 +127,48 @@
 #endif /* __KERNEL__ */
 #endif /* !__i386__ */
 
+
+#ifdef CONFIG_X86_PTRACE_BTS
+/* a branch trace record entry
+ *
+ * In order to unify the interface between various processor versions,
+ * we use the below data structure for all processors.
+ */
+enum bts_qualifier {
+	BTS_INVALID = 0,
+	BTS_BRANCH,
+	BTS_TASK_ARRIVES,
+	BTS_TASK_DEPARTS
+};
+
+struct bts_struct {
+	__u64 qualifier;
+	union {
+		/* BTS_BRANCH */
+		struct {
+			__u64 from_ip;
+			__u64 to_ip;
+		} lbr;
+		/* BTS_TASK_ARRIVES or
+		   BTS_TASK_DEPARTS */
+		__u64 jiffies;
+	} variant;
+};
+#endif /* CONFIG_X86_PTRACE_BTS */
+
 #ifdef __KERNEL__
 
-/* the DS BTS struct is used for ptrace as well */
-#include <asm/ds.h>
+#include <linux/init.h>
 
+struct cpuinfo_x86;
 struct task_struct;
 
+#ifdef CONFIG_X86_PTRACE_BTS
+extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *);
 extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
+#else
+#define ptrace_bts_init_intel(config) do {} while (0)
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 extern unsigned long profile_pc(struct pt_regs *regs);
 
@@ -148,6 +182,9 @@
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 #endif
 
+extern long syscall_trace_enter(struct pt_regs *);
+extern void syscall_trace_leave(struct pt_regs *);
+
 static inline unsigned long regs_return_value(struct pt_regs *regs)
 {
 	return regs->ax;
@@ -213,6 +250,11 @@
 	return regs->bp;
 }
 
+static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+	return regs->sp;
+}
+
 /*
  * These are defined as per linux/ptrace.h, which see.
  */
@@ -239,4 +281,4 @@
 
 #endif /* !__ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__PTRACE_H */

diff --git a/include/asm-x86/pvclock-abi.h b/include/asm-x86/pvclock-abi.h
index 6857f84..edb3b4e 100644
--- a/include/asm-x86/pvclock-abi.h
+++ b/include/asm-x86/pvclock-abi.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PVCLOCK_ABI_H_
-#define _ASM_X86_PVCLOCK_ABI_H_
+#ifndef ASM_X86__PVCLOCK_ABI_H
+#define ASM_X86__PVCLOCK_ABI_H
 #ifndef __ASSEMBLY__
 
 /*
@@ -39,4 +39,4 @@
 } __attribute__((__packed__));
 
 #endif /* __ASSEMBLY__ */
-#endif /* _ASM_X86_PVCLOCK_ABI_H_ */
+#endif /* ASM_X86__PVCLOCK_ABI_H */

diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h
index 85b1bba..1a38f68 100644
--- a/include/asm-x86/pvclock.h
+++ b/include/asm-x86/pvclock.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_PVCLOCK_H_
-#define _ASM_X86_PVCLOCK_H_
+#ifndef ASM_X86__PVCLOCK_H
+#define ASM_X86__PVCLOCK_H
 
 #include <linux/clocksource.h>
 #include <asm/pvclock-abi.h>
@@ -10,4 +10,4 @@
 			    struct pvclock_vcpu_time_info *vcpu,
 			    struct timespec *ts);
 
-#endif /* _ASM_X86_PVCLOCK_H_ */
+#endif /* ASM_X86__PVCLOCK_H */

diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h
index 206f355..1c2f0ce 100644
--- a/include/asm-x86/reboot.h
+++ b/include/asm-x86/reboot.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_REBOOT_H
-#define _ASM_REBOOT_H
+#ifndef ASM_X86__REBOOT_H
+#define ASM_X86__REBOOT_H
 
 struct pt_regs;
 
@@ -18,4 +18,4 @@
 void native_machine_shutdown(void);
 void machine_real_restart(const unsigned char *code, int length);
 
-#endif	/* _ASM_REBOOT_H */
+#endif /* ASM_X86__REBOOT_H */

diff --git a/include/asm-x86/reboot_fixups.h b/include/asm-x86/reboot_fixups.h
index 0cb7d87..2c2987d 100644
--- a/include/asm-x86/reboot_fixups.h
+++ b/include/asm-x86/reboot_fixups.h

@@ -1,6 +1,6 @@
-#ifndef _LINUX_REBOOT_FIXUPS_H
-#define _LINUX_REBOOT_FIXUPS_H
+#ifndef ASM_X86__REBOOT_FIXUPS_H
+#define ASM_X86__REBOOT_FIXUPS_H
 
 extern void mach_reboot_fixups(void);
 
-#endif /* _LINUX_REBOOT_FIXUPS_H */
+#endif /* ASM_X86__REBOOT_FIXUPS_H */

diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h
index 5c2ff4b..a01c4e3 100644
--- a/include/asm-x86/required-features.h
+++ b/include/asm-x86/required-features.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_REQUIRED_FEATURES_H
-#define _ASM_REQUIRED_FEATURES_H 1
+#ifndef ASM_X86__REQUIRED_FEATURES_H
+#define ASM_X86__REQUIRED_FEATURES_H
 
 /* Define minimum CPUID feature set for kernel These bits are checked
    really early to actually display a visible error message before the
@@ -79,4 +79,4 @@
 #define REQUIRED_MASK6	0
 #define REQUIRED_MASK7	0
 
-#endif
+#endif /* ASM_X86__REQUIRED_FEATURES_H */

diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h
index 8d9f0b4..e39376d 100644
--- a/include/asm-x86/resume-trace.h
+++ b/include/asm-x86/resume-trace.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_RESUME_TRACE_H
-#define _ASM_X86_RESUME_TRACE_H
+#ifndef ASM_X86__RESUME_TRACE_H
+#define ASM_X86__RESUME_TRACE_H
 
 #include <asm/asm.h>
 
@@ -7,7 +7,7 @@
 do {								\
 	if (pm_trace_enabled) {					\
 		const void *tracedata;				\
-		asm volatile(_ASM_MOV_UL " $1f,%0\n"		\
+		asm volatile(_ASM_MOV " $1f,%0\n"		\
 			     ".section .tracedata,\"a\"\n"	\
 			     "1:\t.word %c1\n\t"		\
 			     _ASM_PTR " %c2\n"			\
@@ -18,4 +18,4 @@
 	}							\
 } while (0)
 
-#endif
+#endif /* ASM_X86__RESUME_TRACE_H */

diff --git a/include/asm-x86/rio.h b/include/asm-x86/rio.h
index c9448bd..5e1256b 100644
--- a/include/asm-x86/rio.h
+++ b/include/asm-x86/rio.h

@@ -5,8 +5,8 @@
  * Author: Laurent Vivier <Laurent.Vivier@bull.net>
  */
 
-#ifndef __ASM_RIO_H
-#define __ASM_RIO_H
+#ifndef ASM_X86__RIO_H
+#define ASM_X86__RIO_H
 
 #define RIO_TABLE_VERSION	3
 
@@ -60,4 +60,4 @@
 	ALT_CALGARY	= 5,	/* Second Planar Calgary      */
 };
 
-#endif /* __ASM_RIO_H */
+#endif /* ASM_X86__RIO_H */

diff --git a/include/asm-x86/rwlock.h b/include/asm-x86/rwlock.h
index 6a8c0d6..48a3109 100644
--- a/include/asm-x86/rwlock.h
+++ b/include/asm-x86/rwlock.h

@@ -1,8 +1,8 @@
-#ifndef _ASM_X86_RWLOCK_H
-#define _ASM_X86_RWLOCK_H
+#ifndef ASM_X86__RWLOCK_H
+#define ASM_X86__RWLOCK_H
 
 #define RW_LOCK_BIAS		 0x01000000
 
 /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */
 
-#endif /* _ASM_X86_RWLOCK_H */
+#endif /* ASM_X86__RWLOCK_H */

diff --git a/include/asm-x86/rwsem.h b/include/asm-x86/rwsem.h
index 750f2a3..3ff3015 100644
--- a/include/asm-x86/rwsem.h
+++ b/include/asm-x86/rwsem.h

@@ -29,8 +29,8 @@
  * front, then they'll all be woken up, but no other readers will be.
  */
 
-#ifndef _I386_RWSEM_H
-#define _I386_RWSEM_H
+#ifndef ASM_X86__RWSEM_H
+#define ASM_X86__RWSEM_H
 
 #ifndef _LINUX_RWSEM_H
 #error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
@@ -262,4 +262,4 @@
 }
 
 #endif /* __KERNEL__ */
-#endif /* _I386_RWSEM_H */
+#endif /* ASM_X86__RWSEM_H */

diff --git a/include/asm-x86/scatterlist.h b/include/asm-x86/scatterlist.h
index c043206..ee48f88 100644
--- a/include/asm-x86/scatterlist.h
+++ b/include/asm-x86/scatterlist.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SCATTERLIST_H
-#define _ASM_X86_SCATTERLIST_H
+#ifndef ASM_X86__SCATTERLIST_H
+#define ASM_X86__SCATTERLIST_H
 
 #include <asm/types.h>
 
@@ -30,4 +30,4 @@
 # define sg_dma_len(sg)		((sg)->dma_length)
 #endif
 
-#endif
+#endif /* ASM_X86__SCATTERLIST_H */

diff --git a/include/asm-x86/seccomp_32.h b/include/asm-x86/seccomp_32.h
index 36e71c5..cf9ab2d 100644
--- a/include/asm-x86/seccomp_32.h
+++ b/include/asm-x86/seccomp_32.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_SECCOMP_H
-#define _ASM_SECCOMP_H
+#ifndef ASM_X86__SECCOMP_32_H
+#define ASM_X86__SECCOMP_32_H
 
 #include <linux/thread_info.h>
 
@@ -14,4 +14,4 @@
 #define __NR_seccomp_exit __NR_exit
 #define __NR_seccomp_sigreturn __NR_sigreturn
 
-#endif /* _ASM_SECCOMP_H */
+#endif /* ASM_X86__SECCOMP_32_H */

diff --git a/include/asm-x86/seccomp_64.h b/include/asm-x86/seccomp_64.h
index 76cfe69..03274ce 100644
--- a/include/asm-x86/seccomp_64.h
+++ b/include/asm-x86/seccomp_64.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_SECCOMP_H
-#define _ASM_SECCOMP_H
+#ifndef ASM_X86__SECCOMP_64_H
+#define ASM_X86__SECCOMP_64_H
 
 #include <linux/thread_info.h>
 
@@ -22,4 +22,4 @@
 #define __NR_seccomp_exit_32 __NR_ia32_exit
 #define __NR_seccomp_sigreturn_32 __NR_ia32_sigreturn
 
-#endif /* _ASM_SECCOMP_H */
+#endif /* ASM_X86__SECCOMP_64_H */

diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h
index 646452e..ea5f0a8 100644
--- a/include/asm-x86/segment.h
+++ b/include/asm-x86/segment.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SEGMENT_H_
-#define _ASM_X86_SEGMENT_H_
+#ifndef ASM_X86__SEGMENT_H
+#define ASM_X86__SEGMENT_H
 
 /* Constructor for a conventional segment GDT (or LDT) entry */
 /* This is a macro so it can be used in initializers */
@@ -212,4 +212,4 @@
 #endif
 #endif
 
-#endif
+#endif /* ASM_X86__SEGMENT_H */

diff --git a/include/asm-x86/sembuf.h b/include/asm-x86/sembuf.h
index ee50c80..81f06b7 100644
--- a/include/asm-x86/sembuf.h
+++ b/include/asm-x86/sembuf.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SEMBUF_H
-#define _ASM_X86_SEMBUF_H
+#ifndef ASM_X86__SEMBUF_H
+#define ASM_X86__SEMBUF_H
 
 /*
  * The semid64_ds structure for x86 architecture.
@@ -21,4 +21,4 @@
 	unsigned long	__unused4;
 };
 
-#endif /* _ASM_X86_SEMBUF_H */
+#endif /* ASM_X86__SEMBUF_H */

diff --git a/include/asm-x86/serial.h b/include/asm-x86/serial.h
index 628c801..303660b 100644
--- a/include/asm-x86/serial.h
+++ b/include/asm-x86/serial.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SERIAL_H
-#define _ASM_X86_SERIAL_H
+#ifndef ASM_X86__SERIAL_H
+#define ASM_X86__SERIAL_H
 
 /*
  * This assumes you have a 1.8432 MHz clock for your UART.
@@ -26,4 +26,4 @@
 	{ 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS },	/* ttyS2 */	\
 	{ 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS },	/* ttyS3 */
 
-#endif /* _ASM_X86_SERIAL_H */
+#endif /* ASM_X86__SERIAL_H */

diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index a07c6f1..9030cb7 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SETUP_H
-#define _ASM_X86_SETUP_H
+#ifndef ASM_X86__SETUP_H
+#define ASM_X86__SETUP_H
 
 #define COMMAND_LINE_SIZE 2048
 
@@ -41,6 +41,7 @@
 };
 
 extern struct x86_quirks *x86_quirks;
+extern unsigned long saved_video_mode;
 
 #ifndef CONFIG_PARAVIRT
 #define paravirt_post_allocator_init()	do {} while (0)
@@ -100,4 +101,4 @@
 #endif /* __ASSEMBLY__ */
 #endif  /*  __KERNEL__  */
 
-#endif /* _ASM_X86_SETUP_H */
+#endif /* ASM_X86__SETUP_H */

diff --git a/include/asm-x86/shmbuf.h b/include/asm-x86/shmbuf.h
index b51413b..f51aec2 100644
--- a/include/asm-x86/shmbuf.h
+++ b/include/asm-x86/shmbuf.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SHMBUF_H
-#define _ASM_X86_SHMBUF_H
+#ifndef ASM_X86__SHMBUF_H
+#define ASM_X86__SHMBUF_H
 
 /*
  * The shmid64_ds structure for x86 architecture.
@@ -48,4 +48,4 @@
 	unsigned long	__unused4;
 };
 
-#endif /* _ASM_X86_SHMBUF_H */
+#endif /* ASM_X86__SHMBUF_H */

diff --git a/include/asm-x86/shmparam.h b/include/asm-x86/shmparam.h
index 0880cf0..a83a1fd 100644
--- a/include/asm-x86/shmparam.h
+++ b/include/asm-x86/shmparam.h

@@ -1,6 +1,6 @@
-#ifndef _ASM_X86_SHMPARAM_H
-#define _ASM_X86_SHMPARAM_H
+#ifndef ASM_X86__SHMPARAM_H
+#define ASM_X86__SHMPARAM_H
 
 #define SHMLBA PAGE_SIZE	 /* attach addr a multiple of this */
 
-#endif /* _ASM_X86_SHMPARAM_H */
+#endif /* ASM_X86__SHMPARAM_H */

diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h
index 2f9c884..24879c8 100644
--- a/include/asm-x86/sigcontext.h
+++ b/include/asm-x86/sigcontext.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SIGCONTEXT_H
-#define _ASM_X86_SIGCONTEXT_H
+#ifndef ASM_X86__SIGCONTEXT_H
+#define ASM_X86__SIGCONTEXT_H
 
 #include <linux/compiler.h>
 #include <asm/types.h>
@@ -202,4 +202,4 @@
 
 #endif /* !__i386__ */
 
-#endif
+#endif /* ASM_X86__SIGCONTEXT_H */

diff --git a/include/asm-x86/sigcontext32.h b/include/asm-x86/sigcontext32.h
index 57a9686..4e2ec73 100644
--- a/include/asm-x86/sigcontext32.h
+++ b/include/asm-x86/sigcontext32.h

@@ -1,5 +1,5 @@
-#ifndef _SIGCONTEXT32_H
-#define _SIGCONTEXT32_H 1
+#ifndef ASM_X86__SIGCONTEXT32_H
+#define ASM_X86__SIGCONTEXT32_H
 
 /* signal context for 32bit programs. */
 
@@ -68,4 +68,4 @@
        unsigned int cr2;
 };
 
-#endif
+#endif /* ASM_X86__SIGCONTEXT32_H */

diff --git a/include/asm-x86/siginfo.h b/include/asm-x86/siginfo.h
index a477bea..808bdfb 100644
--- a/include/asm-x86/siginfo.h
+++ b/include/asm-x86/siginfo.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SIGINFO_H
-#define _ASM_X86_SIGINFO_H
+#ifndef ASM_X86__SIGINFO_H
+#define ASM_X86__SIGINFO_H
 
 #ifdef __x86_64__
 # define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
@@ -7,4 +7,4 @@
 
 #include <asm-generic/siginfo.h>
 
-#endif
+#endif /* ASM_X86__SIGINFO_H */

diff --git a/include/asm-x86/signal.h b/include/asm-x86/signal.h
index 6dac493..65acc82 100644
--- a/include/asm-x86/signal.h
+++ b/include/asm-x86/signal.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SIGNAL_H
-#define _ASM_X86_SIGNAL_H
+#ifndef ASM_X86__SIGNAL_H
+#define ASM_X86__SIGNAL_H
 
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
@@ -140,6 +140,9 @@
 struct k_sigaction {
 	struct sigaction sa;
 };
+
+extern void do_notify_resume(struct pt_regs *, void *, __u32);
+
 # else /* __KERNEL__ */
 /* Here we must cater to libcs that poke about in kernel headers.  */
 
@@ -256,4 +259,4 @@
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
-#endif
+#endif /* ASM_X86__SIGNAL_H */

diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 3c877f7..04f84f4 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SMP_H_
-#define _ASM_X86_SMP_H_
+#ifndef ASM_X86__SMP_H
+#define ASM_X86__SMP_H
 #ifndef __ASSEMBLY__
 #include <linux/cpumask.h>
 #include <linux/init.h>
@@ -34,6 +34,9 @@
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 DECLARE_PER_CPU(u16, cpu_llc_id);
+#ifdef CONFIG_X86_32
+DECLARE_PER_CPU(int, cpu_number);
+#endif
 
 DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
 DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
@@ -142,7 +145,6 @@
  * from the initial startup. We map APIC_BASE very early in page_setup(),
  * so this is correct in the x86 case.
  */
-DECLARE_PER_CPU(int, cpu_number);
 #define raw_smp_processor_id() (x86_read_percpu(cpu_number))
 extern int safe_smp_processor_id(void);
 
@@ -205,4 +207,4 @@
 #endif
 
 #endif /* __ASSEMBLY__ */
-#endif
+#endif /* ASM_X86__SMP_H */

diff --git a/include/asm-x86/socket.h b/include/asm-x86/socket.h
index 80af9c4..db73274 100644
--- a/include/asm-x86/socket.h
+++ b/include/asm-x86/socket.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_SOCKET_H
-#define _ASM_SOCKET_H
+#ifndef ASM_X86__SOCKET_H
+#define ASM_X86__SOCKET_H
 
 #include <asm/sockios.h>
 
@@ -54,4 +54,4 @@
 
 #define SO_MARK			36
 
-#endif /* _ASM_SOCKET_H */
+#endif /* ASM_X86__SOCKET_H */

diff --git a/include/asm-x86/sockios.h b/include/asm-x86/sockios.h
index 49cc72b..a006704 100644
--- a/include/asm-x86/sockios.h
+++ b/include/asm-x86/sockios.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SOCKIOS_H
-#define _ASM_X86_SOCKIOS_H
+#ifndef ASM_X86__SOCKIOS_H
+#define ASM_X86__SOCKIOS_H
 
 /* Socket-level I/O control calls. */
 #define FIOSETOWN	0x8901
@@ -10,4 +10,4 @@
 #define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
 #define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
-#endif /* _ASM_X86_SOCKIOS_H */
+#endif /* ASM_X86__SOCKIOS_H */

diff --git a/include/asm-x86/sparsemem.h b/include/asm-x86/sparsemem.h
index 9bd48b0..38f8e6b 100644
--- a/include/asm-x86/sparsemem.h
+++ b/include/asm-x86/sparsemem.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SPARSEMEM_H
-#define _ASM_X86_SPARSEMEM_H
+#ifndef ASM_X86__SPARSEMEM_H
+#define ASM_X86__SPARSEMEM_H
 
 #ifdef CONFIG_SPARSEMEM
 /*
@@ -31,4 +31,4 @@
 #endif
 
 #endif /* CONFIG_SPARSEMEM */
-#endif
+#endif /* ASM_X86__SPARSEMEM_H */

diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h
index e39c790..93adae3 100644
--- a/include/asm-x86/spinlock.h
+++ b/include/asm-x86/spinlock.h

@@ -1,5 +1,5 @@
-#ifndef _X86_SPINLOCK_H_
-#define _X86_SPINLOCK_H_
+#ifndef ASM_X86__SPINLOCK_H
+#define ASM_X86__SPINLOCK_H
 
 #include <asm/atomic.h>
 #include <asm/rwlock.h>
@@ -97,7 +97,7 @@
 		     "jne 1f\n\t"
 		     "movw %w0,%w1\n\t"
 		     "incb %h1\n\t"
-		     "lock ; cmpxchgw %w1,%2\n\t"
+		     LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
 		     "1:"
 		     "sete %b1\n\t"
 		     "movzbl %b1,%0\n\t"
@@ -135,7 +135,7 @@
 	int inc = 0x00010000;
 	int tmp;
 
-	asm volatile("lock ; xaddl %0, %1\n"
+	asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
 		     "movzwl %w0, %2\n\t"
 		     "shrl $16, %0\n\t"
 		     "1:\t"
@@ -162,7 +162,7 @@
 		     "cmpl %0,%1\n\t"
 		     "jne 1f\n\t"
 		     "addl $0x00010000, %1\n\t"
-		     "lock ; cmpxchgl %1,%2\n\t"
+		     LOCK_PREFIX "cmpxchgl %1,%2\n\t"
 		     "1:"
 		     "sete %b1\n\t"
 		     "movzbl %b1,%0\n\t"
@@ -366,4 +366,4 @@
 #define _raw_read_relax(lock)	cpu_relax()
 #define _raw_write_relax(lock)	cpu_relax()
 
-#endif
+#endif /* ASM_X86__SPINLOCK_H */

diff --git a/include/asm-x86/spinlock_types.h b/include/asm-x86/spinlock_types.h
index 06c071c..6aa9b56 100644
--- a/include/asm-x86/spinlock_types.h
+++ b/include/asm-x86/spinlock_types.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_SPINLOCK_TYPES_H
-#define __ASM_SPINLOCK_TYPES_H
+#ifndef ASM_X86__SPINLOCK_TYPES_H
+#define ASM_X86__SPINLOCK_TYPES_H
 
 #ifndef __LINUX_SPINLOCK_TYPES_H
 # error "please don't include this file directly"
@@ -17,4 +17,4 @@
 
 #define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
 
-#endif
+#endif /* ASM_X86__SPINLOCK_TYPES_H */

diff --git a/include/asm-x86/srat.h b/include/asm-x86/srat.h
index 774c919..5363e4f 100644
--- a/include/asm-x86/srat.h
+++ b/include/asm-x86/srat.h

@@ -24,8 +24,8 @@
  * Send feedback to Pat Gaughen <gone@us.ibm.com>
  */
 
-#ifndef _ASM_SRAT_H_
-#define _ASM_SRAT_H_
+#ifndef ASM_X86__SRAT_H
+#define ASM_X86__SRAT_H
 
 #ifdef CONFIG_ACPI_NUMA
 extern int get_memcfg_from_srat(void);
@@ -36,4 +36,4 @@
 }
 #endif
 
-#endif /* _ASM_SRAT_H_ */
+#endif /* ASM_X86__SRAT_H */

diff --git a/include/asm-x86/stacktrace.h b/include/asm-x86/stacktrace.h
index 30f8252..f43517e 100644
--- a/include/asm-x86/stacktrace.h
+++ b/include/asm-x86/stacktrace.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_STACKTRACE_H
-#define _ASM_STACKTRACE_H 1
+#ifndef ASM_X86__STACKTRACE_H
+#define ASM_X86__STACKTRACE_H
 
 extern int kstack_depth_to_print;
 
@@ -18,4 +18,4 @@
 		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data);
 
-#endif
+#endif /* ASM_X86__STACKTRACE_H */

diff --git a/include/asm-x86/stat.h b/include/asm-x86/stat.h
index 5c22dcb..1e120f6 100644
--- a/include/asm-x86/stat.h
+++ b/include/asm-x86/stat.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_STAT_H
-#define _ASM_X86_STAT_H
+#ifndef ASM_X86__STAT_H
+#define ASM_X86__STAT_H
 
 #define STAT_HAVE_NSEC 1
 
@@ -111,4 +111,4 @@
 #endif
 };
 
-#endif
+#endif /* ASM_X86__STAT_H */

diff --git a/include/asm-x86/statfs.h b/include/asm-x86/statfs.h
index 7c651aa..3f005bc 100644
--- a/include/asm-x86/statfs.h
+++ b/include/asm-x86/statfs.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_STATFS_H
-#define _ASM_X86_STATFS_H
+#ifndef ASM_X86__STATFS_H
+#define ASM_X86__STATFS_H
 
 #ifdef __i386__
 #include <asm-generic/statfs.h>
@@ -60,4 +60,4 @@
 } __attribute__((packed));
 
 #endif /* !__i386__ */
-#endif
+#endif /* ASM_X86__STATFS_H */

diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h
index 193578c..487843e 100644
--- a/include/asm-x86/string_32.h
+++ b/include/asm-x86/string_32.h

@@ -1,5 +1,5 @@
-#ifndef _I386_STRING_H_
-#define _I386_STRING_H_
+#ifndef ASM_X86__STRING_32_H
+#define ASM_X86__STRING_32_H
 
 #ifdef __KERNEL__
 
@@ -323,4 +323,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__STRING_32_H */

diff --git a/include/asm-x86/string_64.h b/include/asm-x86/string_64.h
index 52b5ab3..a2add11d 100644
--- a/include/asm-x86/string_64.h
+++ b/include/asm-x86/string_64.h

@@ -1,5 +1,5 @@
-#ifndef _X86_64_STRING_H_
-#define _X86_64_STRING_H_
+#ifndef ASM_X86__STRING_64_H
+#define ASM_X86__STRING_64_H
 
 #ifdef __KERNEL__
 
@@ -57,4 +57,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__STRING_64_H */

diff --git a/include/asm-x86/suspend_32.h b/include/asm-x86/suspend_32.h
index 8675c67..acb6d4d 100644
--- a/include/asm-x86/suspend_32.h
+++ b/include/asm-x86/suspend_32.h

@@ -3,8 +3,8 @@
  * Based on code
  * Copyright 2001 Patrick Mochel <mochel@osdl.org>
  */
-#ifndef __ASM_X86_32_SUSPEND_H
-#define __ASM_X86_32_SUSPEND_H
+#ifndef ASM_X86__SUSPEND_32_H
+#define ASM_X86__SUSPEND_32_H
 
 #include <asm/desc.h>
 #include <asm/i387.h>
@@ -48,4 +48,4 @@
 extern int acpi_save_state_mem(void);
 #endif
 
-#endif /* __ASM_X86_32_SUSPEND_H */
+#endif /* ASM_X86__SUSPEND_32_H */

diff --git a/include/asm-x86/suspend_64.h b/include/asm-x86/suspend_64.h
index dc3262b..cf821dd 100644
--- a/include/asm-x86/suspend_64.h
+++ b/include/asm-x86/suspend_64.h

@@ -3,8 +3,8 @@
  * Based on code
  * Copyright 2001 Patrick Mochel <mochel@osdl.org>
  */
-#ifndef __ASM_X86_64_SUSPEND_H
-#define __ASM_X86_64_SUSPEND_H
+#ifndef ASM_X86__SUSPEND_64_H
+#define ASM_X86__SUSPEND_64_H
 
 #include <asm/desc.h>
 #include <asm/i387.h>
@@ -49,4 +49,4 @@
 extern char core_restore_code;
 extern char restore_registers;
 
-#endif /* __ASM_X86_64_SUSPEND_H */
+#endif /* ASM_X86__SUSPEND_64_H */

diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h
index 2730b35..1e20adb 100644
--- a/include/asm-x86/swiotlb.h
+++ b/include/asm-x86/swiotlb.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_SWIOTLB_H
-#define _ASM_SWIOTLB_H 1
+#ifndef ASM_X86__SWIOTLB_H
+#define ASM_X86__SWIOTLB_H
 
 #include <asm/dma-mapping.h>
 
@@ -55,4 +55,4 @@
 
 static inline void dma_mark_clean(void *addr, size_t size) {}
 
-#endif /* _ASM_SWIOTLB_H */
+#endif /* ASM_X86__SWIOTLB_H */

diff --git a/include/asm-x86/sync_bitops.h b/include/asm-x86/sync_bitops.h
index b47a1d0..b689bee 100644
--- a/include/asm-x86/sync_bitops.h
+++ b/include/asm-x86/sync_bitops.h

@@ -1,5 +1,5 @@
-#ifndef _I386_SYNC_BITOPS_H
-#define _I386_SYNC_BITOPS_H
+#ifndef ASM_X86__SYNC_BITOPS_H
+#define ASM_X86__SYNC_BITOPS_H
 
 /*
  * Copyright 1992, Linus Torvalds.
@@ -127,4 +127,4 @@
 
 #undef ADDR
 
-#endif /* _I386_SYNC_BITOPS_H */
+#endif /* ASM_X86__SYNC_BITOPS_H */

diff --git a/include/asm-x86/syscall.h b/include/asm-x86/syscall.h
new file mode 100644
index 0000000..04c47dc
--- /dev/null
+++ b/include/asm-x86/syscall.h

@@ -0,0 +1,211 @@
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H	1
+
+#include <linux/sched.h>
+#include <linux/err.h>
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	/*
+	 * We always sign-extend a -1 value being set here,
+	 * so this is always either -1L or a syscall number.
+	 */
+	return regs->orig_ax;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	regs->ax = regs->orig_ax;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	unsigned long error = regs->ax;
+#ifdef CONFIG_IA32_EMULATION
+	/*
+	 * TS_COMPAT is set for 32-bit syscall entries and then
+	 * remains set until we return to user mode.
+	 */
+	if (task_thread_info(task)->status & TS_COMPAT)
+		/*
+		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+		 * and will match correctly in comparisons.
+		 */
+		error = (long) (int) error;
+#endif
+	return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->ax;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	regs->ax = (long) error ?: val;
+}
+
+#ifdef CONFIG_X86_32
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+	memcpy(args, &regs->bx + i, n * sizeof(args[0]));
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+	memcpy(&regs->bx + i, args, n * sizeof(args[0]));
+}
+
+#else	 /* CONFIG_X86_64 */
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+# ifdef CONFIG_IA32_EMULATION
+	if (task_thread_info(task)->status & TS_COMPAT)
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			*args++ = regs->bp;
+		case 5:
+			if (!n--) break;
+			*args++ = regs->di;
+		case 4:
+			if (!n--) break;
+			*args++ = regs->si;
+		case 3:
+			if (!n--) break;
+			*args++ = regs->dx;
+		case 2:
+			if (!n--) break;
+			*args++ = regs->cx;
+		case 1:
+			if (!n--) break;
+			*args++ = regs->bx;
+		case 0:
+			if (!n--) break;
+		default:
+			BUG();
+			break;
+		}
+	else
+# endif
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			*args++ = regs->r9;
+		case 5:
+			if (!n--) break;
+			*args++ = regs->r8;
+		case 4:
+			if (!n--) break;
+			*args++ = regs->r10;
+		case 3:
+			if (!n--) break;
+			*args++ = regs->dx;
+		case 2:
+			if (!n--) break;
+			*args++ = regs->si;
+		case 1:
+			if (!n--) break;
+			*args++ = regs->di;
+		case 0:
+			if (!n--) break;
+		default:
+			BUG();
+			break;
+		}
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+# ifdef CONFIG_IA32_EMULATION
+	if (task_thread_info(task)->status & TS_COMPAT)
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			regs->bp = *args++;
+		case 5:
+			if (!n--) break;
+			regs->di = *args++;
+		case 4:
+			if (!n--) break;
+			regs->si = *args++;
+		case 3:
+			if (!n--) break;
+			regs->dx = *args++;
+		case 2:
+			if (!n--) break;
+			regs->cx = *args++;
+		case 1:
+			if (!n--) break;
+			regs->bx = *args++;
+		case 0:
+			if (!n--) break;
+		default:
+			BUG();
+		}
+	else
+# endif
+		switch (i + n) {
+		case 6:
+			if (!n--) break;
+			regs->r9 = *args++;
+		case 5:
+			if (!n--) break;
+			regs->r8 = *args++;
+		case 4:
+			if (!n--) break;
+			regs->r10 = *args++;
+		case 3:
+			if (!n--) break;
+			regs->dx = *args++;
+		case 2:
+			if (!n--) break;
+			regs->si = *args++;
+		case 1:
+			if (!n--) break;
+			regs->di = *args++;
+		case 0:
+			if (!n--) break;
+		default:
+			BUG();
+		}
+}
+
+#endif	/* CONFIG_X86_32 */
+
+#endif	/* _ASM_SYSCALL_H */

diff --git a/include/asm-x86/syscalls.h b/include/asm-x86/syscalls.h
new file mode 100644
index 0000000..87803da
--- /dev/null
+++ b/include/asm-x86/syscalls.h

@@ -0,0 +1,93 @@
+/*
+ * syscalls.h - Linux syscall interfaces (arch-specific)
+ *
+ * Copyright (c) 2008 Jaswinder Singh
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#ifndef _ASM_X86_SYSCALLS_H
+#define _ASM_X86_SYSCALLS_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+
+/* Common in X86_32 and X86_64 */
+/* kernel/ioport.c */
+asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
+
+/* X86_32 only */
+#ifdef CONFIG_X86_32
+/* kernel/process_32.c */
+asmlinkage int sys_fork(struct pt_regs);
+asmlinkage int sys_clone(struct pt_regs);
+asmlinkage int sys_vfork(struct pt_regs);
+asmlinkage int sys_execve(struct pt_regs);
+
+/* kernel/signal_32.c */
+asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
+asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
+			     struct old_sigaction __user *);
+asmlinkage int sys_sigaltstack(unsigned long);
+asmlinkage unsigned long sys_sigreturn(unsigned long);
+asmlinkage int sys_rt_sigreturn(unsigned long);
+
+/* kernel/ioport.c */
+asmlinkage long sys_iopl(unsigned long);
+
+/* kernel/ldt.c */
+asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
+
+/* kernel/sys_i386_32.c */
+asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
+			  unsigned long, unsigned long, unsigned long);
+struct mmap_arg_struct;
+asmlinkage int old_mmap(struct mmap_arg_struct __user *);
+struct sel_arg_struct;
+asmlinkage int old_select(struct sel_arg_struct __user *);
+asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
+struct old_utsname;
+asmlinkage int sys_uname(struct old_utsname __user *);
+struct oldold_utsname;
+asmlinkage int sys_olduname(struct oldold_utsname __user *);
+
+/* kernel/tls.c */
+asmlinkage int sys_set_thread_area(struct user_desc __user *);
+asmlinkage int sys_get_thread_area(struct user_desc __user *);
+
+/* kernel/vm86_32.c */
+asmlinkage int sys_vm86old(struct pt_regs);
+asmlinkage int sys_vm86(struct pt_regs);
+
+#else /* CONFIG_X86_32 */
+
+/* X86_64 only */
+/* kernel/process_64.c */
+asmlinkage long sys_fork(struct pt_regs *);
+asmlinkage long sys_clone(unsigned long, unsigned long,
+			  void __user *, void __user *,
+			  struct pt_regs *);
+asmlinkage long sys_vfork(struct pt_regs *);
+asmlinkage long sys_execve(char __user *, char __user * __user *,
+			   char __user * __user *,
+			   struct pt_regs *);
+
+/* kernel/ioport.c */
+asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
+
+/* kernel/signal_64.c */
+asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
+				struct pt_regs *);
+asmlinkage long sys_rt_sigreturn(struct pt_regs *);
+
+/* kernel/sys_x86_64.c */
+asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
+			 unsigned long, unsigned long, unsigned long);
+struct new_utsname;
+asmlinkage long sys_uname(struct new_utsname __user *);
+
+#endif /* CONFIG_X86_32 */
+#endif /* _ASM_X86_SYSCALLS_H */

diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index 983ce37..34505dd 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_SYSTEM_H_
-#define _ASM_X86_SYSTEM_H_
+#ifndef ASM_X86__SYSTEM_H
+#define ASM_X86__SYSTEM_H
 
 #include <asm/asm.h>
 #include <asm/segment.h>
@@ -419,4 +419,4 @@
 	alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
 }
 
-#endif
+#endif /* ASM_X86__SYSTEM_H */

diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h
index 97fa251..5aedb8b 100644
--- a/include/asm-x86/system_64.h
+++ b/include/asm-x86/system_64.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_SYSTEM_H
-#define __ASM_SYSTEM_H
+#ifndef ASM_X86__SYSTEM_64_H
+#define ASM_X86__SYSTEM_64_H
 
 #include <asm/segment.h>
 #include <asm/cmpxchg.h>
@@ -19,4 +19,4 @@
 
 #include <linux/irqflags.h>
 
-#endif
+#endif /* ASM_X86__SYSTEM_64_H */

diff --git a/include/asm-x86/tce.h b/include/asm-x86/tce.h
index b1a4ea0..e7932d7 100644
--- a/include/asm-x86/tce.h
+++ b/include/asm-x86/tce.h

@@ -21,8 +21,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#ifndef _ASM_X86_64_TCE_H
-#define _ASM_X86_64_TCE_H
+#ifndef ASM_X86__TCE_H
+#define ASM_X86__TCE_H
 
 extern unsigned int specified_table_size;
 struct iommu_table;
@@ -45,4 +45,4 @@
 extern void __init free_tce_table(void *tbl);
 extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar);
 
-#endif /* _ASM_X86_64_TCE_H */
+#endif /* ASM_X86__TCE_H */

diff --git a/include/asm-x86/termbits.h b/include/asm-x86/termbits.h
index af1b70e..3d00dc5 100644
--- a/include/asm-x86/termbits.h
+++ b/include/asm-x86/termbits.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TERMBITS_H
-#define _ASM_X86_TERMBITS_H
+#ifndef ASM_X86__TERMBITS_H
+#define ASM_X86__TERMBITS_H
 
 #include <linux/posix_types.h>
 
@@ -195,4 +195,4 @@
 #define	TCSADRAIN	1
 #define	TCSAFLUSH	2
 
-#endif /* _ASM_X86_TERMBITS_H */
+#endif /* ASM_X86__TERMBITS_H */

diff --git a/include/asm-x86/termios.h b/include/asm-x86/termios.h
index f729563..e235db2 100644
--- a/include/asm-x86/termios.h
+++ b/include/asm-x86/termios.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TERMIOS_H
-#define _ASM_X86_TERMIOS_H
+#ifndef ASM_X86__TERMIOS_H
+#define ASM_X86__TERMIOS_H
 
 #include <asm/termbits.h>
 #include <asm/ioctls.h>
@@ -110,4 +110,4 @@
 
 #endif	/* __KERNEL__ */
 
-#endif /* _ASM_X86_TERMIOS_H */
+#endif /* ASM_X86__TERMIOS_H */

diff --git a/include/asm-x86/therm_throt.h b/include/asm-x86/therm_throt.h
index 399bf60..1c7f57b 100644
--- a/include/asm-x86/therm_throt.h
+++ b/include/asm-x86/therm_throt.h

@@ -1,9 +1,9 @@
-#ifndef __ASM_I386_THERM_THROT_H__
-#define __ASM_I386_THERM_THROT_H__ 1
+#ifndef ASM_X86__THERM_THROT_H
+#define ASM_X86__THERM_THROT_H
 
 #include <asm/atomic.h>
 
 extern atomic_t therm_throt_en;
 int therm_throt_process(int curr);
 
-#endif /* __ASM_I386_THERM_THROT_H__ */
+#endif /* ASM_X86__THERM_THROT_H */

diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index da0a675..4db0066 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h

@@ -4,8 +4,8 @@
  * - Incorporating suggestions made by Linus Torvalds and Dave Miller
  */
 
-#ifndef _ASM_X86_THREAD_INFO_H
-#define _ASM_X86_THREAD_INFO_H
+#ifndef ASM_X86__THREAD_INFO_H
+#define ASM_X86__THREAD_INFO_H
 
 #include <linux/compiler.h>
 #include <asm/page.h>
@@ -71,6 +71,7 @@
  * Warning: layout of LSW is hardcoded in entry.S
  */
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
@@ -93,6 +94,7 @@
 #define TIF_BTS_TRACE_TS	27      /* record scheduling event timestamps */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
@@ -133,7 +135,7 @@
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
-	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY)
+	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
@@ -258,4 +260,4 @@
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 #define arch_task_cache_init arch_task_cache_init
 #endif
-#endif /* _ASM_X86_THREAD_INFO_H */
+#endif /* ASM_X86__THREAD_INFO_H */

diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h
index a17fa47..3e724ee 100644
--- a/include/asm-x86/time.h
+++ b/include/asm-x86/time.h

@@ -1,5 +1,5 @@
-#ifndef _ASMX86_TIME_H
-#define _ASMX86_TIME_H
+#ifndef ASM_X86__TIME_H
+#define ASM_X86__TIME_H
 
 extern void hpet_time_init(void);
 
@@ -46,6 +46,8 @@
 
 #endif
 
+extern void time_init(void);
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else /* !CONFIG_PARAVIRT */
@@ -58,4 +60,4 @@
 
 extern unsigned long __init calibrate_cpu(void);
 
-#endif
+#endif /* ASM_X86__TIME_H */

diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h
index fb2a4dd..d0babce 100644
--- a/include/asm-x86/timer.h
+++ b/include/asm-x86/timer.h

@@ -1,5 +1,5 @@
-#ifndef _ASMi386_TIMER_H
-#define _ASMi386_TIMER_H
+#ifndef ASM_X86__TIMER_H
+#define ASM_X86__TIMER_H
 #include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/percpu.h>
@@ -9,9 +9,12 @@
 unsigned long long native_sched_clock(void);
 unsigned long native_calibrate_tsc(void);
 
+#ifdef CONFIG_X86_32
 extern int timer_ack;
-extern int no_timer_check;
 extern int recalibrate_cpu_khz(void);
+#endif /* CONFIG_X86_32 */
+
+extern int no_timer_check;
 
 #ifndef CONFIG_PARAVIRT
 #define calibrate_tsc() native_calibrate_tsc()
@@ -60,4 +63,4 @@
 	return ns;
 }
 
-#endif
+#endif /* ASM_X86__TIMER_H */

diff --git a/include/asm-x86/timex.h b/include/asm-x86/timex.h
index 43e5a78..d1ce241 100644
--- a/include/asm-x86/timex.h
+++ b/include/asm-x86/timex.h

@@ -1,6 +1,6 @@
 /* x86 architecture timex specifications */
-#ifndef _ASM_X86_TIMEX_H
-#define _ASM_X86_TIMEX_H
+#ifndef ASM_X86__TIMEX_H
+#define ASM_X86__TIMEX_H
 
 #include <asm/processor.h>
 #include <asm/tsc.h>
@@ -16,4 +16,4 @@
 
 #define ARCH_HAS_READ_CURRENT_TIMER
 
-#endif
+#endif /* ASM_X86__TIMEX_H */

diff --git a/include/asm-x86/tlb.h b/include/asm-x86/tlb.h
index e4e9e2d..db36e9e 100644
--- a/include/asm-x86/tlb.h
+++ b/include/asm-x86/tlb.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TLB_H
-#define _ASM_X86_TLB_H
+#ifndef ASM_X86__TLB_H
+#define ASM_X86__TLB_H
 
 #define tlb_start_vma(tlb, vma) do { } while (0)
 #define tlb_end_vma(tlb, vma) do { } while (0)
@@ -8,4 +8,4 @@
 
 #include <asm-generic/tlb.h>
 
-#endif
+#endif /* ASM_X86__TLB_H */

diff --git a/include/asm-x86/tlbflush.h b/include/asm-x86/tlbflush.h
index 35c76ce..ef68b76 100644
--- a/include/asm-x86/tlbflush.h
+++ b/include/asm-x86/tlbflush.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TLBFLUSH_H
-#define _ASM_X86_TLBFLUSH_H
+#ifndef ASM_X86__TLBFLUSH_H
+#define ASM_X86__TLBFLUSH_H
 
 #include <linux/mm.h>
 #include <linux/sched.h>
@@ -165,4 +165,4 @@
 	flush_tlb_all();
 }
 
-#endif /* _ASM_X86_TLBFLUSH_H */
+#endif /* ASM_X86__TLBFLUSH_H */

diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index 90ac771..7eca9bc 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h

@@ -22,8 +22,8 @@
  *
  * Send feedback to <colpatch@us.ibm.com>
  */
-#ifndef _ASM_X86_TOPOLOGY_H
-#define _ASM_X86_TOPOLOGY_H
+#ifndef ASM_X86__TOPOLOGY_H
+#define ASM_X86__TOPOLOGY_H
 
 #ifdef CONFIG_X86_32
 # ifdef CONFIG_X86_HT
@@ -255,4 +255,4 @@
 }
 #endif
 
-#endif /* _ASM_X86_TOPOLOGY_H */
+#endif /* ASM_X86__TOPOLOGY_H */

diff --git a/include/asm-x86/trampoline.h b/include/asm-x86/trampoline.h
index b156b08..0406bbd 100644
--- a/include/asm-x86/trampoline.h
+++ b/include/asm-x86/trampoline.h

@@ -1,5 +1,5 @@
-#ifndef __TRAMPOLINE_HEADER
-#define __TRAMPOLINE_HEADER
+#ifndef ASM_X86__TRAMPOLINE_H
+#define ASM_X86__TRAMPOLINE_H
 
 #ifndef __ASSEMBLY__
 
@@ -18,4 +18,4 @@
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* __TRAMPOLINE_HEADER */
+#endif /* ASM_X86__TRAMPOLINE_H */

diff --git a/include/asm-x86/traps.h b/include/asm-x86/traps.h
index a4b65a7..2ccebc6 100644
--- a/include/asm-x86/traps.h
+++ b/include/asm-x86/traps.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TRAPS_H
-#define _ASM_X86_TRAPS_H
+#ifndef ASM_X86__TRAPS_H
+#define ASM_X86__TRAPS_H
 
 /* Common in X86_32 and X86_64 */
 asmlinkage void divide_error(void);
@@ -51,6 +51,8 @@
 unsigned long patch_espfix_desc(unsigned long, unsigned long);
 asmlinkage void math_emulate(long);
 
+void do_page_fault(struct pt_regs *regs, unsigned long error_code);
+
 #else /* CONFIG_X86_32 */
 
 asmlinkage void double_fault(void);
@@ -62,5 +64,7 @@
 asmlinkage void do_simd_coprocessor_error(struct pt_regs *);
 asmlinkage void do_spurious_interrupt_bug(struct pt_regs *);
 
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code);
+
 #endif /* CONFIG_X86_32 */
-#endif /* _ASM_X86_TRAPS_H */
+#endif /* ASM_X86__TRAPS_H */

diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index cb6f6ee..ad0f5c4 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h

@@ -1,8 +1,8 @@
 /*
  * x86 TSC related functions
  */
-#ifndef _ASM_X86_TSC_H
-#define _ASM_X86_TSC_H
+#ifndef ASM_X86__TSC_H
+#define ASM_X86__TSC_H
 
 #include <asm/processor.h>
 
@@ -59,4 +59,4 @@
 
 extern int notsc_setup(char *);
 
-#endif
+#endif /* ASM_X86__TSC_H */

diff --git a/include/asm-x86/types.h b/include/asm-x86/types.h
index 1ac80cd..e78b52e 100644
--- a/include/asm-x86/types.h
+++ b/include/asm-x86/types.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_TYPES_H
-#define _ASM_X86_TYPES_H
+#ifndef ASM_X86__TYPES_H
+#define ASM_X86__TYPES_H
 
 #include <asm-generic/int-ll64.h>
 
@@ -33,4 +33,4 @@
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__TYPES_H */

diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 5f702d1..48ebc0a 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_UACCES_H_
-#define _ASM_UACCES_H_
+#ifndef ASM_X86__UACCESS_H
+#define ASM_X86__UACCESS_H
 /*
  * User space memory access functions
  */
@@ -450,5 +450,5 @@
 # include "uaccess_64.h"
 #endif
 
-#endif
+#endif /* ASM_X86__UACCESS_H */
 

diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 6fdef39..6b5b57d 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h

@@ -1,5 +1,5 @@
-#ifndef __i386_UACCESS_H
-#define __i386_UACCESS_H
+#ifndef ASM_X86__UACCESS_32_H
+#define ASM_X86__UACCESS_32_H
 
 /*
  * User space memory access functions
@@ -215,4 +215,4 @@
 unsigned long __must_check clear_user(void __user *mem, unsigned long len);
 unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
 
-#endif /* __i386_UACCESS_H */
+#endif /* ASM_X86__UACCESS_32_H */

diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 45806d6..c96c1f5 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h

@@ -1,5 +1,5 @@
-#ifndef __X86_64_UACCESS_H
-#define __X86_64_UACCESS_H
+#ifndef ASM_X86__UACCESS_64_H
+#define ASM_X86__UACCESS_64_H
 
 /*
  * User space memory access functions
@@ -199,4 +199,4 @@
 unsigned long
 copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
 
-#endif /* __X86_64_UACCESS_H */
+#endif /* ASM_X86__UACCESS_64_H */

diff --git a/include/asm-x86/ucontext.h b/include/asm-x86/ucontext.h
index 50a79f7..9948dd3 100644
--- a/include/asm-x86/ucontext.h
+++ b/include/asm-x86/ucontext.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UCONTEXT_H
-#define _ASM_X86_UCONTEXT_H
+#ifndef ASM_X86__UCONTEXT_H
+#define ASM_X86__UCONTEXT_H
 
 struct ucontext {
 	unsigned long	  uc_flags;
@@ -9,4 +9,4 @@
 	sigset_t	  uc_sigmask;	/* mask last for extensibility */
 };
 
-#endif /* _ASM_X86_UCONTEXT_H */
+#endif /* ASM_X86__UCONTEXT_H */

diff --git a/include/asm-x86/unaligned.h b/include/asm-x86/unaligned.h
index a7bd416..59dcdec 100644
--- a/include/asm-x86/unaligned.h
+++ b/include/asm-x86/unaligned.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UNALIGNED_H
-#define _ASM_X86_UNALIGNED_H
+#ifndef ASM_X86__UNALIGNED_H
+#define ASM_X86__UNALIGNED_H
 
 /*
  * The x86 can do unaligned accesses itself.
@@ -11,4 +11,4 @@
 #define get_unaligned __get_unaligned_le
 #define put_unaligned __put_unaligned_le
 
-#endif /* _ASM_X86_UNALIGNED_H */
+#endif /* ASM_X86__UNALIGNED_H */

diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
index d739467..017f4a8 100644
--- a/include/asm-x86/unistd_32.h
+++ b/include/asm-x86/unistd_32.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_I386_UNISTD_H_
-#define _ASM_I386_UNISTD_H_
+#ifndef ASM_X86__UNISTD_32_H
+#define ASM_X86__UNISTD_32_H
 
 /*
  * This file contains the system call numbers.
@@ -376,4 +376,4 @@
 #endif
 
 #endif /* __KERNEL__ */
-#endif /* _ASM_I386_UNISTD_H_ */
+#endif /* ASM_X86__UNISTD_32_H */

diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index 3a341d7..ace83f1 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_UNISTD_H_
-#define _ASM_X86_64_UNISTD_H_
+#ifndef ASM_X86__UNISTD_64_H
+#define ASM_X86__UNISTD_64_H
 
 #ifndef __SYSCALL
 #define __SYSCALL(a, b)
@@ -690,4 +690,4 @@
 #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
 #endif	/* __KERNEL__ */
 
-#endif /* _ASM_X86_64_UNISTD_H_ */
+#endif /* ASM_X86__UNISTD_64_H */

diff --git a/include/asm-x86/unwind.h b/include/asm-x86/unwind.h
index 8b064bd..a215156 100644
--- a/include/asm-x86/unwind.h
+++ b/include/asm-x86/unwind.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UNWIND_H
-#define _ASM_X86_UNWIND_H
+#ifndef ASM_X86__UNWIND_H
+#define ASM_X86__UNWIND_H
 
 #define UNW_PC(frame) ((void)(frame), 0UL)
 #define UNW_SP(frame) ((void)(frame), 0UL)
@@ -10,4 +10,4 @@
 	return 0;
 }
 
-#endif /* _ASM_X86_UNWIND_H */
+#endif /* ASM_X86__UNWIND_H */

diff --git a/include/asm-x86/user32.h b/include/asm-x86/user32.h
index a3d9100..aa66c18 100644
--- a/include/asm-x86/user32.h
+++ b/include/asm-x86/user32.h

@@ -1,5 +1,5 @@
-#ifndef USER32_H
-#define USER32_H 1
+#ifndef ASM_X86__USER32_H
+#define ASM_X86__USER32_H
 
 /* IA32 compatible user structures for ptrace.
  * These should be used for 32bit coredumps too. */
@@ -67,4 +67,4 @@
 };
 
 
-#endif
+#endif /* ASM_X86__USER32_H */

diff --git a/include/asm-x86/user_32.h b/include/asm-x86/user_32.h
index d6e51ed..e0fe2f5 100644
--- a/include/asm-x86/user_32.h
+++ b/include/asm-x86/user_32.h

@@ -1,5 +1,5 @@
-#ifndef _I386_USER_H
-#define _I386_USER_H
+#ifndef ASM_X86__USER_32_H
+#define ASM_X86__USER_32_H
 
 #include <asm/page.h>
 /* Core file format: The core file is written in such a way that gdb
@@ -128,4 +128,4 @@
 #define HOST_TEXT_START_ADDR (u.start_code)
 #define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
 
-#endif /* _I386_USER_H */
+#endif /* ASM_X86__USER_32_H */

diff --git a/include/asm-x86/user_64.h b/include/asm-x86/user_64.h
index 6037b63..38b5799 100644
--- a/include/asm-x86/user_64.h
+++ b/include/asm-x86/user_64.h

@@ -1,5 +1,5 @@
-#ifndef _X86_64_USER_H
-#define _X86_64_USER_H
+#ifndef ASM_X86__USER_64_H
+#define ASM_X86__USER_64_H
 
 #include <asm/types.h>
 #include <asm/page.h>
@@ -134,4 +134,4 @@
 #define HOST_TEXT_START_ADDR (u.start_code)
 #define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
 
-#endif /* _X86_64_USER_H */
+#endif /* ASM_X86__USER_64_H */

diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h
index aa73362..7cd6d7e 100644
--- a/include/asm-x86/uv/bios.h
+++ b/include/asm-x86/uv/bios.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_BIOS_H
-#define _ASM_X86_BIOS_H
+#ifndef ASM_X86__UV__BIOS_H
+#define ASM_X86__UV__BIOS_H
 
 /*
  * BIOS layer definitions.
@@ -65,4 +65,4 @@
 		   unsigned long *drift_info);
 extern const char *x86_bios_strerror(long status);
 
-#endif /* _ASM_X86_BIOS_H */
+#endif /* ASM_X86__UV__BIOS_H */

diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h
index 610b6b3..77153fb 100644
--- a/include/asm-x86/uv/uv_bau.h
+++ b/include/asm-x86/uv/uv_bau.h

@@ -8,8 +8,8 @@
  * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
  */
 
-#ifndef __ASM_X86_UV_BAU__
-#define __ASM_X86_UV_BAU__
+#ifndef ASM_X86__UV__UV_BAU_H
+#define ASM_X86__UV__UV_BAU_H
 
 #include <linux/bitmap.h>
 #define BITSPERBYTE 8
@@ -329,4 +329,4 @@
 extern void uv_bau_message_intr1(void);
 extern void uv_bau_timeout_intr1(void);
 
-#endif /* __ASM_X86_UV_BAU__ */
+#endif /* ASM_X86__UV__UV_BAU_H */

diff --git a/include/asm-x86/uv/uv_hub.h b/include/asm-x86/uv/uv_hub.h
index a4ef26e..bdb5b01 100644
--- a/include/asm-x86/uv/uv_hub.h
+++ b/include/asm-x86/uv/uv_hub.h

@@ -8,8 +8,8 @@
  * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
  */
 
-#ifndef __ASM_X86_UV_HUB_H__
-#define __ASM_X86_UV_HUB_H__
+#ifndef ASM_X86__UV__UV_HUB_H
+#define ASM_X86__UV__UV_HUB_H
 
 #include <linux/numa.h>
 #include <linux/percpu.h>
@@ -350,5 +350,5 @@
 	return uv_possible_blades;
 }
 
-#endif /* __ASM_X86_UV_HUB__ */
+#endif /* ASM_X86__UV__UV_HUB_H */
 

diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h
index 151fd7f..8b03d89 100644
--- a/include/asm-x86/uv/uv_mmrs.h
+++ b/include/asm-x86/uv/uv_mmrs.h

@@ -8,8 +8,8 @@
  * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
  */
 
-#ifndef __ASM_X86_UV_MMRS__
-#define __ASM_X86_UV_MMRS__
+#ifndef ASM_X86__UV__UV_MMRS_H
+#define ASM_X86__UV__UV_MMRS_H
 
 #define UV_MMR_ENABLE		(1UL << 63)
 
@@ -1292,4 +1292,4 @@
 };
 
 
-#endif /* __ASM_X86_UV_MMRS__ */
+#endif /* ASM_X86__UV__UV_MMRS_H */

diff --git a/include/asm-x86/vdso.h b/include/asm-x86/vdso.h
index 8e18fb8..4ab3209 100644
--- a/include/asm-x86/vdso.h
+++ b/include/asm-x86/vdso.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_VDSO_H
-#define _ASM_X86_VDSO_H	1
+#ifndef ASM_X86__VDSO_H
+#define ASM_X86__VDSO_H
 
 #ifdef CONFIG_X86_64
 extern const char VDSO64_PRELINK[];
@@ -44,4 +44,4 @@
 extern const char vdso32_syscall_start, vdso32_syscall_end;
 extern const char vdso32_sysenter_start, vdso32_sysenter_end;
 
-#endif	/* asm-x86/vdso.h */
+#endif /* ASM_X86__VDSO_H */

diff --git a/include/asm-x86/vga.h b/include/asm-x86/vga.h
index 0ccf804..b9e493d 100644
--- a/include/asm-x86/vga.h
+++ b/include/asm-x86/vga.h

@@ -4,8 +4,8 @@
  *	(c) 1998 Martin Mares <mj@ucw.cz>
  */
 
-#ifndef _LINUX_ASM_VGA_H_
-#define _LINUX_ASM_VGA_H_
+#ifndef ASM_X86__VGA_H
+#define ASM_X86__VGA_H
 
 /*
  *	On the PC, we can just recalculate addresses and then
@@ -17,4 +17,4 @@
 #define vga_readb(x) (*(x))
 #define vga_writeb(x, y) (*(y) = (x))
 
-#endif
+#endif /* ASM_X86__VGA_H */

diff --git a/include/asm-x86/vgtod.h b/include/asm-x86/vgtod.h
index 3301f09..38fd133 100644
--- a/include/asm-x86/vgtod.h
+++ b/include/asm-x86/vgtod.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_VGTOD_H
-#define _ASM_VGTOD_H 1
+#ifndef ASM_X86__VGTOD_H
+#define ASM_X86__VGTOD_H
 
 #include <asm/vsyscall.h>
 #include <linux/clocksource.h>
@@ -26,4 +26,4 @@
 __section_vsyscall_gtod_data;
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
-#endif
+#endif /* ASM_X86__VGTOD_H */

diff --git a/include/asm-x86/visws/cobalt.h b/include/asm-x86/visws/cobalt.h
index 9952588..9627a8f 100644
--- a/include/asm-x86/visws/cobalt.h
+++ b/include/asm-x86/visws/cobalt.h

@@ -1,5 +1,5 @@
-#ifndef __I386_SGI_COBALT_H
-#define __I386_SGI_COBALT_H
+#ifndef ASM_X86__VISWS__COBALT_H
+#define ASM_X86__VISWS__COBALT_H
 
 #include <asm/fixmap.h>
 
@@ -122,4 +122,4 @@
 
 extern char visws_board_rev;
 
-#endif /* __I386_SGI_COBALT_H */
+#endif /* ASM_X86__VISWS__COBALT_H */

diff --git a/include/asm-x86/visws/lithium.h b/include/asm-x86/visws/lithium.h
index dfcd4f0..b36d3b3 100644
--- a/include/asm-x86/visws/lithium.h
+++ b/include/asm-x86/visws/lithium.h

@@ -1,5 +1,5 @@
-#ifndef __I386_SGI_LITHIUM_H
-#define __I386_SGI_LITHIUM_H
+#ifndef ASM_X86__VISWS__LITHIUM_H
+#define ASM_X86__VISWS__LITHIUM_H
 
 #include <asm/fixmap.h>
 
@@ -49,5 +49,5 @@
 	return *((volatile unsigned short *)(LI_PCIB_VADDR+reg));
 }
 
-#endif
+#endif /* ASM_X86__VISWS__LITHIUM_H */
 

diff --git a/include/asm-x86/visws/piix4.h b/include/asm-x86/visws/piix4.h
index 83ea4f4..61c9380 100644
--- a/include/asm-x86/visws/piix4.h
+++ b/include/asm-x86/visws/piix4.h

@@ -1,5 +1,5 @@
-#ifndef __I386_SGI_PIIX_H
-#define __I386_SGI_PIIX_H
+#ifndef ASM_X86__VISWS__PIIX4_H
+#define ASM_X86__VISWS__PIIX4_H
 
 /*
  * PIIX4 as used on SGI Visual Workstations
@@ -104,4 +104,4 @@
  */
 #define	PIIX_GPI_STPCLK		0x4	// STPCLK signal routed back in
 
-#endif
+#endif /* ASM_X86__VISWS__PIIX4_H */

diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h
index 5ce3513..998bd18 100644
--- a/include/asm-x86/vm86.h
+++ b/include/asm-x86/vm86.h

@@ -1,5 +1,5 @@
-#ifndef _LINUX_VM86_H
-#define _LINUX_VM86_H
+#ifndef ASM_X86__VM86_H
+#define ASM_X86__VM86_H
 
 /*
  * I'm guessing at the VIF/VIP flag usage, but hope that this is how
@@ -205,4 +205,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif
+#endif /* ASM_X86__VM86_H */

diff --git a/include/asm-x86/vmi_time.h b/include/asm-x86/vmi_time.h
index c3118c3..b2d39e6 100644
--- a/include/asm-x86/vmi_time.h
+++ b/include/asm-x86/vmi_time.h

@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef __VMI_TIME_H
-#define __VMI_TIME_H
+#ifndef ASM_X86__VMI_TIME_H
+#define ASM_X86__VMI_TIME_H
 
 /*
  * Raw VMI call indices for timer functions
@@ -95,4 +95,4 @@
 
 #define CONFIG_VMI_ALARM_HZ	100
 
-#endif
+#endif /* ASM_X86__VMI_TIME_H */

diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h
index 6b66ff9..dcd4682 100644
--- a/include/asm-x86/vsyscall.h
+++ b/include/asm-x86/vsyscall.h

@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_64_VSYSCALL_H_
-#define _ASM_X86_64_VSYSCALL_H_
+#ifndef ASM_X86__VSYSCALL_H
+#define ASM_X86__VSYSCALL_H
 
 enum vsyscall_num {
 	__NR_vgettimeofday,
@@ -41,4 +41,4 @@
 
 #endif /* __KERNEL__ */
 
-#endif /* _ASM_X86_64_VSYSCALL_H_ */
+#endif /* ASM_X86__VSYSCALL_H */

diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h
index 8ded747..8151f5b 100644
--- a/include/asm-x86/xen/events.h
+++ b/include/asm-x86/xen/events.h

@@ -1,5 +1,5 @@
-#ifndef __XEN_EVENTS_H
-#define __XEN_EVENTS_H
+#ifndef ASM_X86__XEN__EVENTS_H
+#define ASM_X86__XEN__EVENTS_H
 
 enum ipi_vector {
 	XEN_RESCHEDULE_VECTOR,
@@ -21,4 +21,4 @@
 	do_IRQ(regs);
 }
 
-#endif /* __XEN_EVENTS_H */
+#endif /* ASM_X86__XEN__EVENTS_H */

diff --git a/include/asm-x86/xen/grant_table.h b/include/asm-x86/xen/grant_table.h
index 2444d45..c4baab4 100644
--- a/include/asm-x86/xen/grant_table.h
+++ b/include/asm-x86/xen/grant_table.h

@@ -1,7 +1,7 @@
-#ifndef __XEN_GRANT_TABLE_H
-#define __XEN_GRANT_TABLE_H
+#ifndef ASM_X86__XEN__GRANT_TABLE_H
+#define ASM_X86__XEN__GRANT_TABLE_H
 
 #define xen_alloc_vm_area(size)	alloc_vm_area(size)
 #define xen_free_vm_area(area)	free_vm_area(area)
 
-#endif /* __XEN_GRANT_TABLE_H */
+#endif /* ASM_X86__XEN__GRANT_TABLE_H */

diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index 91cb7fd..44f4259 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h

@@ -30,8 +30,8 @@
  * IN THE SOFTWARE.
  */
 
-#ifndef __HYPERCALL_H__
-#define __HYPERCALL_H__
+#ifndef ASM_X86__XEN__HYPERCALL_H
+#define ASM_X86__XEN__HYPERCALL_H
 
 #include <linux/errno.h>
 #include <linux/string.h>
@@ -524,4 +524,4 @@
 	mcl->args[1] = esp;
 }
 
-#endif /* __HYPERCALL_H__ */
+#endif /* ASM_X86__XEN__HYPERCALL_H */

diff --git a/include/asm-x86/xen/hypervisor.h b/include/asm-x86/xen/hypervisor.h
index 04ee061..0ef3a88 100644
--- a/include/asm-x86/xen/hypervisor.h
+++ b/include/asm-x86/xen/hypervisor.h

@@ -30,8 +30,8 @@
  * IN THE SOFTWARE.
  */
 
-#ifndef __HYPERVISOR_H__
-#define __HYPERVISOR_H__
+#ifndef ASM_X86__XEN__HYPERVISOR_H
+#define ASM_X86__XEN__HYPERVISOR_H
 
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -69,4 +69,4 @@
 
 #define is_running_on_xen()	(xen_start_info ? 1 : 0)
 
-#endif /* __HYPERVISOR_H__ */
+#endif /* ASM_X86__XEN__HYPERVISOR_H */

diff --git a/include/asm-x86/xen/interface.h b/include/asm-x86/xen/interface.h
index 9d810f2..d077bba 100644
--- a/include/asm-x86/xen/interface.h
+++ b/include/asm-x86/xen/interface.h

@@ -6,8 +6,8 @@
  * Copyright (c) 2004, K A Fraser
  */
 
-#ifndef __ASM_X86_XEN_INTERFACE_H
-#define __ASM_X86_XEN_INTERFACE_H
+#ifndef ASM_X86__XEN__INTERFACE_H
+#define ASM_X86__XEN__INTERFACE_H
 
 #ifdef __XEN__
 #define __DEFINE_GUEST_HANDLE(name, type) \
@@ -172,4 +172,4 @@
 #define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"
 #endif
 
-#endif	/* __ASM_X86_XEN_INTERFACE_H */
+#endif /* ASM_X86__XEN__INTERFACE_H */

diff --git a/include/asm-x86/xen/interface_32.h b/include/asm-x86/xen/interface_32.h
index d8ac41d..08167e1 100644
--- a/include/asm-x86/xen/interface_32.h
+++ b/include/asm-x86/xen/interface_32.h

@@ -6,8 +6,8 @@
  * Copyright (c) 2004, K A Fraser
  */
 
-#ifndef __ASM_X86_XEN_INTERFACE_32_H
-#define __ASM_X86_XEN_INTERFACE_32_H
+#ifndef ASM_X86__XEN__INTERFACE_32_H
+#define ASM_X86__XEN__INTERFACE_32_H
 
 
 /*
@@ -94,4 +94,4 @@
 #define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
 #define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
 
-#endif	/* __ASM_X86_XEN_INTERFACE_32_H */
+#endif /* ASM_X86__XEN__INTERFACE_32_H */

diff --git a/include/asm-x86/xen/interface_64.h b/include/asm-x86/xen/interface_64.h
index 842266c..046c0f1 100644
--- a/include/asm-x86/xen/interface_64.h
+++ b/include/asm-x86/xen/interface_64.h

@@ -1,5 +1,5 @@
-#ifndef __ASM_X86_XEN_INTERFACE_64_H
-#define __ASM_X86_XEN_INTERFACE_64_H
+#ifndef ASM_X86__XEN__INTERFACE_64_H
+#define ASM_X86__XEN__INTERFACE_64_H
 
 /*
  * 64-bit segment selectors
@@ -156,4 +156,4 @@
 #endif /* !__ASSEMBLY__ */
 
 
-#endif	/* __ASM_X86_XEN_INTERFACE_64_H */
+#endif /* ASM_X86__XEN__INTERFACE_64_H */

diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index 7b3835d..c50185d 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h

@@ -1,5 +1,5 @@
-#ifndef __XEN_PAGE_H
-#define __XEN_PAGE_H
+#ifndef ASM_X86__XEN__PAGE_H
+#define ASM_X86__XEN__PAGE_H
 
 #include <linux/pfn.h>
 
@@ -162,4 +162,4 @@
 void make_lowmem_page_readonly(void *vaddr);
 void make_lowmem_page_readwrite(void *vaddr);
 
-#endif /* __XEN_PAGE_H */
+#endif /* ASM_X86__XEN__PAGE_H */

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index b68ec09..31474e8 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild

@@ -180,6 +180,7 @@
 unifdef-y += auto_fs.h
 unifdef-y += auxvec.h
 unifdef-y += binfmts.h
+unifdef-y += blktrace_api.h
 unifdef-y += capability.h
 unifdef-y += capi.h
 unifdef-y += cciss_ioctl.h

diff --git a/include/linux/ata.h b/include/linux/ata.h
index 8a12d71..be00973 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h

@@ -88,6 +88,7 @@
 	ATA_ID_DLF		= 128,
 	ATA_ID_CSFO		= 129,
 	ATA_ID_CFA_POWER	= 160,
+	ATA_ID_ROT_SPEED	= 217,
 	ATA_ID_PIO4		= (1 << 1),
 
 	ATA_ID_SERNO_LEN	= 20,
@@ -667,6 +668,15 @@
 	return 0;
 }
 
+static inline int ata_id_has_unload(const u16 *id)
+{
+	if (ata_id_major_version(id) >= 7 &&
+	    (id[ATA_ID_CFSSE] & 0xC000) == 0x4000 &&
+	    id[ATA_ID_CFSSE] & (1 << 13))
+		return 1;
+	return 0;
+}
+
 static inline int ata_id_current_chs_valid(const u16 *id)
 {
 	/* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command
@@ -691,6 +701,11 @@
 	return 0;
 }
 
+static inline int ata_id_is_ssd(const u16 *id)
+{
+	return id[ATA_ID_ROT_SPEED] == 0x01;
+}
+
 static inline int ata_drive_40wire(const u16 *dev_id)
 {
 	if (ata_id_is_sata(dev_id))

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 0933a14..ff5b4cf 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h

@@ -26,21 +26,8 @@
 
 #ifdef CONFIG_BLOCK
 
-/* Platforms may set this to teach the BIO layer about IOMMU hardware. */
 #include <asm/io.h>
 
-#if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY)
-#define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1))
-#define BIOVEC_VIRT_OVERSIZE(x)	((x) > BIO_VMERGE_MAX_SIZE)
-#else
-#define BIOVEC_VIRT_START_SIZE(x)	0
-#define BIOVEC_VIRT_OVERSIZE(x)		0
-#endif
-
-#ifndef BIO_VMERGE_BOUNDARY
-#define BIO_VMERGE_BOUNDARY	0
-#endif
-
 #define BIO_DEBUG
 
 #ifdef BIO_DEBUG
@@ -88,25 +75,14 @@
 	/* Number of segments in this BIO after
 	 * physical address coalescing is performed.
 	 */
-	unsigned short		bi_phys_segments;
-
-	/* Number of segments after physical and DMA remapping
-	 * hardware coalescing is performed.
-	 */
-	unsigned short		bi_hw_segments;
+	unsigned int		bi_phys_segments;
 
 	unsigned int		bi_size;	/* residual I/O count */
 
-	/*
-	 * To keep track of the max hw size, we account for the
-	 * sizes of the first and last virtually mergeable segments
-	 * in this bio
-	 */
-	unsigned int		bi_hw_front_size;
-	unsigned int		bi_hw_back_size;
-
 	unsigned int		bi_max_vecs;	/* max bvl_vecs we can hold */
 
+	unsigned int		bi_comp_cpu;	/* completion CPU */
+
 	struct bio_vec		*bi_io_vec;	/* the actual vec list */
 
 	bio_end_io_t		*bi_end_io;
@@ -126,11 +102,14 @@
 #define BIO_UPTODATE	0	/* ok after I/O completion */
 #define BIO_RW_BLOCK	1	/* RW_AHEAD set, and read/write would block */
 #define BIO_EOF		2	/* out-out-bounds error */
-#define BIO_SEG_VALID	3	/* nr_hw_seg valid */
+#define BIO_SEG_VALID	3	/* bi_phys_segments valid */
 #define BIO_CLONED	4	/* doesn't own data */
 #define BIO_BOUNCED	5	/* bio is a bounce bio */
 #define BIO_USER_MAPPED 6	/* contains user pages */
 #define BIO_EOPNOTSUPP	7	/* not supported */
+#define BIO_CPU_AFFINE	8	/* complete bio on same CPU as submitted */
+#define BIO_NULL_MAPPED 9	/* contains invalid user pages */
+#define BIO_FS_INTEGRITY 10	/* fs owns integrity data, not block layer */
 #define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
 
 /*
@@ -144,18 +123,31 @@
 /*
  * bio bi_rw flags
  *
- * bit 0 -- read (not set) or write (set)
+ * bit 0 -- data direction
+ *	If not set, bio is a read from device. If set, it's a write to device.
  * bit 1 -- rw-ahead when set
  * bit 2 -- barrier
+ *	Insert a serialization point in the IO queue, forcing previously
+ *	submitted IO to be completed before this oen is issued.
  * bit 3 -- fail fast, don't want low level driver retries
  * bit 4 -- synchronous I/O hint: the block layer will unplug immediately
+ *	Note that this does NOT indicate that the IO itself is sync, just
+ *	that the block layer will not postpone issue of this IO by plugging.
+ * bit 5 -- metadata request
+ *	Used for tracing to differentiate metadata and data IO. May also
+ *	get some preferential treatment in the IO scheduler
+ * bit 6 -- discard sectors
+ *	Informs the lower level device that this range of sectors is no longer
+ *	used by the file system and may thus be freed by the device. Used
+ *	for flash based storage.
  */
-#define BIO_RW		0
-#define BIO_RW_AHEAD	1
+#define BIO_RW		0	/* Must match RW in req flags (blkdev.h) */
+#define BIO_RW_AHEAD	1	/* Must match FAILFAST in req flags */
 #define BIO_RW_BARRIER	2
 #define BIO_RW_FAILFAST	3
 #define BIO_RW_SYNC	4
 #define BIO_RW_META	5
+#define BIO_RW_DISCARD	6
 
 /*
  * upper 16 bits of bi_rw define the io priority of this bio
@@ -185,14 +177,15 @@
 #define bio_failfast(bio)	((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
 #define bio_rw_ahead(bio)	((bio)->bi_rw & (1 << BIO_RW_AHEAD))
 #define bio_rw_meta(bio)	((bio)->bi_rw & (1 << BIO_RW_META))
-#define bio_empty_barrier(bio)	(bio_barrier(bio) && !(bio)->bi_size)
+#define bio_discard(bio)	((bio)->bi_rw & (1 << BIO_RW_DISCARD))
+#define bio_empty_barrier(bio)	(bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio))
 
 static inline unsigned int bio_cur_sectors(struct bio *bio)
 {
 	if (bio->bi_vcnt)
 		return bio_iovec(bio)->bv_len >> 9;
-
-	return 0;
+	else /* dataless requests such as discard */
+		return bio->bi_size >> 9;
 }
 
 static inline void *bio_data(struct bio *bio)
@@ -236,8 +229,6 @@
 	((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
 #endif
 
-#define BIOVEC_VIRT_MERGEABLE(vec1, vec2)	\
-	((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0)
 #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
 	(((addr1) | (mask)) == (((addr2) - 1) | (mask)))
 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
@@ -319,15 +310,14 @@
 	atomic_t			cnt;
 	int				error;
 };
-extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool,
-				  int first_sectors);
-extern mempool_t *bio_split_pool;
+extern struct bio_pair *bio_split(struct bio *bi, int first_sectors);
 extern void bio_pair_release(struct bio_pair *dbio);
 
 extern struct bio_set *bioset_create(int, int);
 extern void bioset_free(struct bio_set *);
 
 extern struct bio *bio_alloc(gfp_t, int);
+extern struct bio *bio_kmalloc(gfp_t, int);
 extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
 extern void bio_put(struct bio *);
 extern void bio_free(struct bio *, struct bio_set *);
@@ -335,7 +325,6 @@
 extern void bio_endio(struct bio *, int);
 struct request_queue;
 extern int bio_phys_segments(struct request_queue *, struct bio *);
-extern int bio_hw_segments(struct request_queue *, struct bio *);
 
 extern void __bio_clone(struct bio *, struct bio *);
 extern struct bio *bio_clone(struct bio *, gfp_t);
@@ -346,12 +335,14 @@
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
 extern int bio_get_nr_vecs(struct block_device *);
+extern sector_t bio_sector_offset(struct bio *, unsigned short, unsigned int);
 extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
-				unsigned long, unsigned int, int);
+				unsigned long, unsigned int, int, gfp_t);
 struct sg_iovec;
+struct rq_map_data;
 extern struct bio *bio_map_user_iov(struct request_queue *,
 				    struct block_device *,
-				    struct sg_iovec *, int, int);
+				    struct sg_iovec *, int, int, gfp_t);
 extern void bio_unmap_user(struct bio *);
 extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
 				gfp_t);
@@ -359,15 +350,25 @@
 				 gfp_t, int);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
-extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int);
-extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *,
-				     int, int);
+extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
+				 unsigned long, unsigned int, int, gfp_t);
+extern struct bio *bio_copy_user_iov(struct request_queue *,
+				     struct rq_map_data *, struct sg_iovec *,
+				     int, int, gfp_t);
 extern int bio_uncopy_user(struct bio *);
 void zero_fill_bio(struct bio *bio);
 extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
 extern unsigned int bvec_nr_vecs(unsigned short idx);
 
 /*
+ * Allow queuer to specify a completion CPU for this bio
+ */
+static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
+{
+	bio->bi_comp_cpu = cpu;
+}
+
+/*
  * bio_set is used to allow other portions of the IO system to
  * allocate their own private memory pools for bio and iovec structures.
  * These memory pools in turn all allocate from the bio_slab
@@ -445,6 +446,14 @@
 	__bio_kmap_irq((bio), (bio)->bi_idx, (flags))
 #define bio_kunmap_irq(buf,flags)	__bio_kunmap_irq(buf, flags)
 
+/*
+ * Check whether this bio carries any data or not. A NULL bio is allowed.
+ */
+static inline int bio_has_data(struct bio *bio)
+{
+	return bio && bio->bi_io_vec != NULL;
+}
+
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 
 #define bip_vec_idx(bip, idx)	(&(bip->bip_vec[(idx)]))
@@ -458,14 +467,7 @@
 #define bip_for_each_vec(bvl, bip, i)					\
 	__bip_for_each_vec(bvl, bip, i, (bip)->bip_idx)
 
-static inline int bio_integrity(struct bio *bio)
-{
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-	return bio->bi_integrity != NULL;
-#else
-	return 0;
-#endif
-}
+#define bio_integrity(bio) (bio->bi_integrity != NULL)
 
 extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *);
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 53ea933..a92d9e4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h

@@ -16,7 +16,9 @@
 #include <linux/bio.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
+#include <linux/gfp.h>
 #include <linux/bsg.h>
+#include <linux/smp.h>
 
 #include <asm/scatterlist.h>
 
@@ -54,7 +56,6 @@
 	REQ_TYPE_PM_SUSPEND,		/* suspend request */
 	REQ_TYPE_PM_RESUME,		/* resume request */
 	REQ_TYPE_PM_SHUTDOWN,		/* shutdown request */
-	REQ_TYPE_FLUSH,			/* flush request */
 	REQ_TYPE_SPECIAL,		/* driver defined type */
 	REQ_TYPE_LINUX_BLOCK,		/* generic block layer message */
 	/*
@@ -76,19 +77,18 @@
  *
  */
 enum {
-	/*
-	 * just examples for now
-	 */
 	REQ_LB_OP_EJECT	= 0x40,		/* eject request */
-	REQ_LB_OP_FLUSH = 0x41,		/* flush device */
+	REQ_LB_OP_FLUSH = 0x41,		/* flush request */
+	REQ_LB_OP_DISCARD = 0x42,	/* discard sectors */
 };
 
 /*
- * request type modified bits. first three bits match BIO_RW* bits, important
+ * request type modified bits. first two bits match BIO_RW* bits, important
  */
 enum rq_flag_bits {
 	__REQ_RW,		/* not set, read. set, write */
 	__REQ_FAILFAST,		/* no low level driver retries */
+	__REQ_DISCARD,		/* request to discard sectors */
 	__REQ_SORTED,		/* elevator knows about this request */
 	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
 	__REQ_HARDBARRIER,	/* may not be passed by drive either */
@@ -111,6 +111,7 @@
 };
 
 #define REQ_RW		(1 << __REQ_RW)
+#define REQ_DISCARD	(1 << __REQ_DISCARD)
 #define REQ_FAILFAST	(1 << __REQ_FAILFAST)
 #define REQ_SORTED	(1 << __REQ_SORTED)
 #define REQ_SOFTBARRIER	(1 << __REQ_SOFTBARRIER)
@@ -140,12 +141,14 @@
  */
 struct request {
 	struct list_head queuelist;
-	struct list_head donelist;
+	struct call_single_data csd;
+	int cpu;
 
 	struct request_queue *q;
 
 	unsigned int cmd_flags;
 	enum rq_cmd_type_bits cmd_type;
+	unsigned long atomic_flags;
 
 	/* Maintain bio traversal state for part by part I/O submission.
 	 * hard_* are block layer internals, no driver should touch them!
@@ -190,13 +193,6 @@
 	 */
 	unsigned short nr_phys_segments;
 
-	/* Number of scatter-gather addr+len pairs after
-	 * physical and DMA remapping hardware coalescing is performed.
-	 * This is the number of scatter-gather entries the driver
-	 * will actually have to deal with after DMA mapping is done.
-	 */
-	unsigned short nr_hw_segments;
-
 	unsigned short ioprio;
 
 	void *special;
@@ -220,6 +216,8 @@
 	void *data;
 	void *sense;
 
+	unsigned long deadline;
+	struct list_head timeout_list;
 	unsigned int timeout;
 	int retries;
 
@@ -233,6 +231,11 @@
 	struct request *next_rq;
 };
 
+static inline unsigned short req_get_ioprio(struct request *req)
+{
+	return req->ioprio;
+}
+
 /*
  * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
  * requests. Some step values could eventually be made generic.
@@ -252,6 +255,7 @@
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unplug_fn) (struct request_queue *);
+typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -265,6 +269,15 @@
 typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
 typedef void (softirq_done_fn)(struct request *);
 typedef int (dma_drain_needed_fn)(struct request *);
+typedef int (lld_busy_fn) (struct request_queue *q);
+
+enum blk_eh_timer_return {
+	BLK_EH_NOT_HANDLED,
+	BLK_EH_HANDLED,
+	BLK_EH_RESET_TIMER,
+};
+
+typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *);
 
 enum blk_queue_state {
 	Queue_down,
@@ -307,10 +320,13 @@
 	make_request_fn		*make_request_fn;
 	prep_rq_fn		*prep_rq_fn;
 	unplug_fn		*unplug_fn;
+	prepare_discard_fn	*prepare_discard_fn;
 	merge_bvec_fn		*merge_bvec_fn;
 	prepare_flush_fn	*prepare_flush_fn;
 	softirq_done_fn		*softirq_done_fn;
+	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
+	lld_busy_fn		*lld_busy_fn;
 
 	/*
 	 * Dispatch queue sorting
@@ -385,6 +401,10 @@
 	unsigned int		nr_sorted;
 	unsigned int		in_flight;
 
+	unsigned int		rq_timeout;
+	struct timer_list	timeout;
+	struct list_head	timeout_list;
+
 	/*
 	 * sg stuff
 	 */
@@ -421,6 +441,10 @@
 #define QUEUE_FLAG_ELVSWITCH	8	/* don't use elevator, just do FIFO */
 #define QUEUE_FLAG_BIDI		9	/* queue supports bidi requests */
 #define QUEUE_FLAG_NOMERGES    10	/* disable merge attempts */
+#define QUEUE_FLAG_SAME_COMP   11	/* force complete on same CPU */
+#define QUEUE_FLAG_FAIL_IO     12	/* fake timeout */
+#define QUEUE_FLAG_STACKABLE   13	/* supports request stacking */
+#define QUEUE_FLAG_NONROT      14	/* non-rotational device (SSD) */
 
 static inline int queue_is_locked(struct request_queue *q)
 {
@@ -526,7 +550,10 @@
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
+#define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
 #define blk_queue_flushing(q)	((q)->ordseq)
+#define blk_queue_stackable(q)	\
+	test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
 
 #define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
 #define blk_pc_request(rq)	((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
@@ -536,16 +563,18 @@
 #define blk_noretry_request(rq)	((rq)->cmd_flags & REQ_FAILFAST)
 #define blk_rq_started(rq)	((rq)->cmd_flags & REQ_STARTED)
 
-#define blk_account_rq(rq)	(blk_rq_started(rq) && blk_fs_request(rq))
+#define blk_account_rq(rq)	(blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) 
 
 #define blk_pm_suspend_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
 #define blk_pm_resume_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_RESUME)
 #define blk_pm_request(rq)	\
 	(blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
 
+#define blk_rq_cpu_valid(rq)	((rq)->cpu != -1)
 #define blk_sorted_rq(rq)	((rq)->cmd_flags & REQ_SORTED)
 #define blk_barrier_rq(rq)	((rq)->cmd_flags & REQ_HARDBARRIER)
 #define blk_fua_rq(rq)		((rq)->cmd_flags & REQ_FUA)
+#define blk_discard_rq(rq)	((rq)->cmd_flags & REQ_DISCARD)
 #define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
 #define blk_empty_barrier(rq)	(blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
 /* rq->queuelist of dequeued request must be list_empty() */
@@ -592,7 +621,8 @@
 #define RQ_NOMERGE_FLAGS	\
 	(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
 #define rq_mergeable(rq)	\
-	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
+	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
+	 (blk_discard_rq(rq) || blk_fs_request((rq))))
 
 /*
  * q->prep_rq_fn return values
@@ -637,6 +667,12 @@
 }
 #endif /* CONFIG_MMU */
 
+struct rq_map_data {
+	struct page **pages;
+	int page_order;
+	int nr_entries;
+};
+
 struct req_iterator {
 	int i;
 	struct bio *bio;
@@ -664,6 +700,10 @@
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
+extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
+extern int blk_lld_busy(struct request_queue *q);
+extern int blk_insert_cloned_request(struct request_queue *q,
+				     struct request *rq);
 extern void blk_plug_device(struct request_queue *);
 extern void blk_plug_device_unlocked(struct request_queue *);
 extern int blk_remove_plug(struct request_queue *);
@@ -705,11 +745,14 @@
 extern void __blk_run_queue(struct request_queue *);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_start_queueing(struct request_queue *);
-extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long);
+extern int blk_rq_map_user(struct request_queue *, struct request *,
+			   struct rq_map_data *, void __user *, unsigned long,
+			   gfp_t);
 extern int blk_rq_unmap_user(struct bio *);
 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
 extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
-			       struct sg_iovec *, int, unsigned int);
+			       struct rq_map_data *, struct sg_iovec *, int,
+			       unsigned int, gfp_t);
 extern int blk_execute_rq(struct request_queue *, struct gendisk *,
 			  struct request *, int);
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
@@ -750,12 +793,15 @@
 extern int blk_end_bidi_request(struct request *rq, int error,
 				unsigned int nr_bytes, unsigned int bidi_bytes);
 extern void end_request(struct request *, int);
-extern void end_queued_request(struct request *, int);
-extern void end_dequeued_request(struct request *, int);
 extern int blk_end_request_callback(struct request *rq, int error,
 				unsigned int nr_bytes,
 				int (drv_callback)(struct request *));
 extern void blk_complete_request(struct request *);
+extern void __blk_complete_request(struct request *);
+extern void blk_abort_request(struct request *);
+extern void blk_abort_queue(struct request_queue *);
+extern void blk_update_request(struct request *rq, int error,
+			       unsigned int nr_bytes);
 
 /*
  * blk_end_request() takes bytes instead of sectors as a complete size.
@@ -790,12 +836,16 @@
 extern int blk_queue_dma_drain(struct request_queue *q,
 			       dma_drain_needed_fn *dma_drain_needed,
 			       void *buf, unsigned int size);
+extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
+extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
+extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
+extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
 extern int blk_do_ordered(struct request_queue *, struct request **);
@@ -837,6 +887,16 @@
 }
 
 extern int blkdev_issue_flush(struct block_device *, sector_t *);
+extern int blkdev_issue_discard(struct block_device *,
+				sector_t sector, sector_t nr_sects, gfp_t);
+
+static inline int sb_issue_discard(struct super_block *sb,
+				   sector_t block, sector_t nr_blocks)
+{
+	block <<= (sb->s_blocksize_bits - 9);
+	nr_blocks <<= (sb->s_blocksize_bits - 9);
+	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL);
+}
 
 /*
 * command filter functions
@@ -874,6 +934,13 @@
 	return q ? q->dma_alignment : 511;
 }
 
+static inline int blk_rq_aligned(struct request_queue *q, void *addr,
+				 unsigned int len)
+{
+	unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
+	return !((unsigned long)addr & alignment) && !(len & alignment);
+}
+
 /* assumes size > 256 */
 static inline unsigned int blksize_bits(unsigned int size)
 {
@@ -900,7 +967,7 @@
 }
 
 struct work_struct;
-int kblockd_schedule_work(struct work_struct *work);
+int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
 void kblockd_flush_work(struct work_struct *work);
 
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
@@ -945,49 +1012,19 @@
 
 extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
 extern void blk_integrity_unregister(struct gendisk *);
-extern int blk_integrity_compare(struct block_device *, struct block_device *);
+extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
 extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
 extern int blk_rq_count_integrity_sg(struct request *);
 
-static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
-{
-	if (bi)
-		return bi->tuple_size;
-
-	return 0;
-}
-
-static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
+static inline
+struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
 {
 	return bdev->bd_disk->integrity;
 }
 
-static inline unsigned int bdev_get_tag_size(struct block_device *bdev)
+static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 {
-	struct blk_integrity *bi = bdev_get_integrity(bdev);
-
-	if (bi)
-		return bi->tag_size;
-
-	return 0;
-}
-
-static inline int bdev_integrity_enabled(struct block_device *bdev, int rw)
-{
-	struct blk_integrity *bi = bdev_get_integrity(bdev);
-
-	if (bi == NULL)
-		return 0;
-
-	if (rw == READ && bi->verify_fn != NULL &&
-	    (bi->flags & INTEGRITY_FLAG_READ))
-		return 1;
-
-	if (rw == WRITE && bi->generate_fn != NULL &&
-	    (bi->flags & INTEGRITY_FLAG_WRITE))
-		return 1;
-
-	return 0;
+	return disk->integrity;
 }
 
 static inline int blk_integrity_rq(struct request *rq)
@@ -1004,7 +1041,7 @@
 #define blk_rq_count_integrity_sg(a)		(0)
 #define blk_rq_map_integrity_sg(a, b)		(0)
 #define bdev_get_integrity(a)			(0)
-#define bdev_get_tag_size(a)			(0)
+#define blk_get_integrity(a)			(0)
 #define blk_integrity_compare(a, b)		(0)
 #define blk_integrity_register(a, b)		(0)
 #define blk_integrity_unregister(a)		do { } while (0);

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index d084b8d..3a31eb5 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h

@@ -1,8 +1,10 @@
 #ifndef BLKTRACE_H
 #define BLKTRACE_H
 
+#ifdef __KERNEL__
 #include <linux/blkdev.h>
 #include <linux/relay.h>
+#endif
 
 /*
  * Trace categories
@@ -21,6 +23,7 @@
 	BLK_TC_NOTIFY	= 1 << 10,	/* special message */
 	BLK_TC_AHEAD	= 1 << 11,	/* readahead */
 	BLK_TC_META	= 1 << 12,	/* metadata */
+	BLK_TC_DISCARD	= 1 << 13,	/* discard requests */
 
 	BLK_TC_END	= 1 << 15,	/* only 16-bits, reminder */
 };
@@ -47,6 +50,7 @@
 	__BLK_TA_SPLIT,			/* bio was split */
 	__BLK_TA_BOUNCE,		/* bio was bounced */
 	__BLK_TA_REMAP,			/* bio was remapped */
+	__BLK_TA_ABORT,			/* request aborted */
 };
 
 /*
@@ -77,6 +81,7 @@
 #define BLK_TA_SPLIT		(__BLK_TA_SPLIT)
 #define BLK_TA_BOUNCE		(__BLK_TA_BOUNCE)
 #define BLK_TA_REMAP		(__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_ABORT		(__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE))
 
 #define BLK_TN_PROCESS		(__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
 #define BLK_TN_TIMESTAMP	(__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
@@ -89,17 +94,17 @@
  * The trace itself
  */
 struct blk_io_trace {
-	u32 magic;		/* MAGIC << 8 | version */
-	u32 sequence;		/* event number */
-	u64 time;		/* in microseconds */
-	u64 sector;		/* disk offset */
-	u32 bytes;		/* transfer length */
-	u32 action;		/* what happened */
-	u32 pid;		/* who did it */
-	u32 device;		/* device number */
-	u32 cpu;		/* on what cpu did it happen */
-	u16 error;		/* completion error */
-	u16 pdu_len;		/* length of data after this trace */
+	__u32 magic;		/* MAGIC << 8 | version */
+	__u32 sequence;		/* event number */
+	__u64 time;		/* in microseconds */
+	__u64 sector;		/* disk offset */
+	__u32 bytes;		/* transfer length */
+	__u32 action;		/* what happened */
+	__u32 pid;		/* who did it */
+	__u32 device;		/* device number */
+	__u32 cpu;		/* on what cpu did it happen */
+	__u16 error;		/* completion error */
+	__u16 pdu_len;		/* length of data after this trace */
 };
 
 /*
@@ -117,6 +122,23 @@
 	Blktrace_stopped,
 };
 
+#define BLKTRACE_BDEV_SIZE	32
+
+/*
+ * User setup structure passed with BLKTRACESTART
+ */
+struct blk_user_trace_setup {
+	char name[BLKTRACE_BDEV_SIZE];	/* output */
+	__u16 act_mask;			/* input */
+	__u32 buf_size;			/* input */
+	__u32 buf_nr;			/* input */
+	__u64 start_lba;
+	__u64 end_lba;
+	__u32 pid;
+};
+
+#ifdef __KERNEL__
+#if defined(CONFIG_BLK_DEV_IO_TRACE)
 struct blk_trace {
 	int trace_state;
 	struct rchan *rchan;
@@ -133,21 +155,6 @@
 	atomic_t dropped;
 };
 
-/*
- * User setup structure passed with BLKTRACESTART
- */
-struct blk_user_trace_setup {
-	char name[BDEVNAME_SIZE];	/* output */
-	u16 act_mask;			/* input */
-	u32 buf_size;			/* input */
-	u32 buf_nr;			/* input */
-	u64 start_lba;
-	u64 end_lba;
-	u32 pid;
-};
-
-#ifdef __KERNEL__
-#if defined(CONFIG_BLK_DEV_IO_TRACE)
 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
 extern void blk_trace_shutdown(struct request_queue *);
 extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
@@ -195,6 +202,9 @@
 	if (likely(!bt))
 		return;
 
+	if (blk_discard_rq(rq))
+		rw |= (1 << BIO_RW_DISCARD);
+
 	if (blk_pc_request(rq)) {
 		what |= BLK_TC_ACT(BLK_TC_PC);
 		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);

diff --git a/include/linux/device.h b/include/linux/device.h
index 4d8372d..246937c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h

@@ -199,6 +199,11 @@
 	struct class_private *p;
 };
 
+struct class_dev_iter {
+	struct klist_iter		ki;
+	const struct device_type	*type;
+};
+
 extern struct kobject *sysfs_dev_block_kobj;
 extern struct kobject *sysfs_dev_char_kobj;
 extern int __must_check __class_register(struct class *class,
@@ -213,6 +218,13 @@
 	__class_register(class, &__key);	\
 })
 
+extern void class_dev_iter_init(struct class_dev_iter *iter,
+				struct class *class,
+				struct device *start,
+				const struct device_type *type);
+extern struct device *class_dev_iter_next(struct class_dev_iter *iter);
+extern void class_dev_iter_exit(struct class_dev_iter *iter);
+
 extern int class_for_each_device(struct class *class, struct device *start,
 				 void *data,
 				 int (*fn)(struct device *dev, void *data));
@@ -396,7 +408,7 @@
 	spinlock_t		devres_lock;
 	struct list_head	devres_head;
 
-	struct list_head	node;
+	struct klist_node	knode_class;
 	struct class		*class;
 	dev_t			devt;	/* dev_t, creates the sysfs "dev" */
 	struct attribute_group	**groups;	/* optional groups */

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 639624b..92f6f63 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h

@@ -112,6 +112,7 @@
 extern int elv_register_queue(struct request_queue *q);
 extern void elv_unregister_queue(struct request_queue *q);
 extern int elv_may_queue(struct request_queue *, int);
+extern void elv_abort_queue(struct request_queue *);
 extern void elv_completed_request(struct request_queue *, struct request *);
 extern int elv_set_request(struct request_queue *, struct request *, gfp_t);
 extern void elv_put_request(struct request_queue *, struct request *);
@@ -173,15 +174,15 @@
 #define rb_entry_rq(node)	rb_entry((node), struct request, rb_node)
 
 /*
- * Hack to reuse the donelist list_head as the fifo time holder while
+ * Hack to reuse the csd.list list_head as the fifo time holder while
  * the request is in the io scheduler. Saves an unsigned long in rq.
  */
-#define rq_fifo_time(rq)	((unsigned long) (rq)->donelist.next)
-#define rq_set_fifo_time(rq,exp)	((rq)->donelist.next = (void *) (exp))
+#define rq_fifo_time(rq)	((unsigned long) (rq)->csd.list.next)
+#define rq_set_fifo_time(rq,exp)	((rq)->csd.list.next = (void *) (exp))
 #define rq_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
 #define rq_fifo_clear(rq)	do {		\
 	list_del_init(&(rq)->queuelist);	\
-	INIT_LIST_HEAD(&(rq)->donelist);	\
+	INIT_LIST_HEAD(&(rq)->csd.list);	\
 	} while (0)
 
 /*

diff --git a/include/linux/fd.h b/include/linux/fd.h
index b6bd41d..f5d194a 100644
--- a/include/linux/fd.h
+++ b/include/linux/fd.h

@@ -15,10 +15,16 @@
 			sect,		/* sectors per track */
 			head,		/* nr of heads */
 			track,		/* nr of tracks */
-			stretch;	/* !=0 means double track steps */
+			stretch;	/* bit 0 !=0 means double track steps */
+					/* bit 1 != 0 means swap sides */
+					/* bits 2..9 give the first sector */
+					/*  number (the LSB is flipped) */
 #define FD_STRETCH 1
 #define FD_SWAPSIDES 2
 #define FD_ZEROBASED 4
+#define FD_SECTBASEMASK 0x3FC
+#define FD_MKSECTBASE(s) (((s) ^ 1) << 2)
+#define FD_SECTBASE(floppy) ((((floppy)->stretch & FD_SECTBASEMASK) >> 2) ^ 1)
 
 	unsigned char	gap,		/* gap1 size */
 

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 580b513..32477e8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h

@@ -86,7 +86,9 @@
 #define READ_META	(READ | (1 << BIO_RW_META))
 #define WRITE_SYNC	(WRITE | (1 << BIO_RW_SYNC))
 #define SWRITE_SYNC	(SWRITE | (1 << BIO_RW_SYNC))
-#define WRITE_BARRIER	((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
+#define WRITE_BARRIER	(WRITE | (1 << BIO_RW_BARRIER))
+#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD)
+#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER))
 
 #define SEL_IN		1
 #define SEL_OUT		2
@@ -222,6 +224,7 @@
 #define BLKTRACESTART _IO(0x12,116)
 #define BLKTRACESTOP _IO(0x12,117)
 #define BLKTRACETEARDOWN _IO(0x12,118)
+#define BLKDISCARD _IO(0x12,119)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
@@ -1682,6 +1685,7 @@
 
 /* fs/block_dev.c */
 #define BDEVNAME_SIZE	32	/* Largest string for a blockdev identifier */
+#define BDEVT_SIZE	10	/* Largest string for MAJ:MIN for blkdev */
 
 #ifdef CONFIG_BLOCK
 #define BLKDEV_MAJOR_HASH_SIZE	255
@@ -1718,6 +1722,9 @@
  */
 #define bio_data_dir(bio)	((bio)->bi_rw & 1)
 
+extern void check_disk_size_change(struct gendisk *disk,
+				   struct block_device *bdev);
+extern int revalidate_disk(struct gendisk *);
 extern int check_disk_change(struct block_device *);
 extern int __invalidate_device(struct block_device *);
 extern int invalidate_partition(struct gendisk *, int);

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index be4f5e5..206cdf9 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h

@@ -11,12 +11,15 @@
 
 #include <linux/types.h>
 #include <linux/kdev_t.h>
+#include <linux/rcupdate.h>
 
 #ifdef CONFIG_BLOCK
 
-#define kobj_to_dev(k) container_of(k, struct device, kobj)
-#define dev_to_disk(device) container_of(device, struct gendisk, dev)
-#define dev_to_part(device) container_of(device, struct hd_struct, dev)
+#define kobj_to_dev(k)		container_of((k), struct device, kobj)
+#define dev_to_disk(device)	container_of((device), struct gendisk, part0.__dev)
+#define dev_to_part(device)	container_of((device), struct hd_struct, __dev)
+#define disk_to_dev(disk)	(&(disk)->part0.__dev)
+#define part_to_dev(part)	(&((part)->__dev))
 
 extern struct device_type part_type;
 extern struct kobject *block_depr;
@@ -55,6 +58,9 @@
 	UNIXWARE_PARTITION = 0x63,	/* Same as GNU_HURD and SCO Unix */
 };
 
+#define DISK_MAX_PARTS			256
+#define DISK_NAME_LEN			32
+
 #include <linux/major.h>
 #include <linux/device.h>
 #include <linux/smp.h>
@@ -87,7 +93,7 @@
 struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
-	struct device dev;
+	struct device __dev;
 	struct kobject *holder_dir;
 	int policy, partno;
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -100,6 +106,7 @@
 #else
 	struct disk_stats dkstats;
 #endif
+	struct rcu_head rcu_head;
 };
 
 #define GENHD_FL_REMOVABLE			1
@@ -108,100 +115,148 @@
 #define GENHD_FL_CD				8
 #define GENHD_FL_UP				16
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
-#define GENHD_FL_FAIL				64
+#define GENHD_FL_EXT_DEVT			64 /* allow extended devt */
+
+#define BLK_SCSI_MAX_CMDS	(256)
+#define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
+
+struct blk_scsi_cmd_filter {
+	unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
+	unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
+	struct kobject kobj;
+};
+
+struct disk_part_tbl {
+	struct rcu_head rcu_head;
+	int len;
+	struct hd_struct *part[];
+};
 
 struct gendisk {
+	/* major, first_minor and minors are input parameters only,
+	 * don't use directly.  Use disk_devt() and disk_max_parts().
+	 */
 	int major;			/* major number of driver */
 	int first_minor;
 	int minors;                     /* maximum number of minors, =1 for
                                          * disks that can't be partitioned. */
-	char disk_name[32];		/* name of major driver */
-	struct hd_struct **part;	/* [indexed by minor] */
+
+	char disk_name[DISK_NAME_LEN];	/* name of major driver */
+
+	/* Array of pointers to partitions indexed by partno.
+	 * Protected with matching bdev lock but stat and other
+	 * non-critical accesses use RCU.  Always access through
+	 * helpers.
+	 */
+	struct disk_part_tbl *part_tbl;
+	struct hd_struct part0;
+
 	struct block_device_operations *fops;
 	struct request_queue *queue;
 	void *private_data;
-	sector_t capacity;
 
 	int flags;
 	struct device *driverfs_dev;  // FIXME: remove
-	struct device dev;
-	struct kobject *holder_dir;
 	struct kobject *slave_dir;
 
 	struct timer_rand_state *random;
-	int policy;
 
 	atomic_t sync_io;		/* RAID */
-	unsigned long stamp;
-	int in_flight;
-#ifdef	CONFIG_SMP
-	struct disk_stats *dkstats;
-#else
-	struct disk_stats dkstats;
-#endif
 	struct work_struct async_notify;
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct blk_integrity *integrity;
 #endif
+	int node_id;
 };
 
-/* 
- * Macros to operate on percpu disk statistics:
- *
- * The __ variants should only be called in critical sections. The full
- * variants disable/enable preemption.
- */
-static inline struct hd_struct *get_part(struct gendisk *gendiskp,
-					 sector_t sector)
+static inline struct gendisk *part_to_disk(struct hd_struct *part)
 {
-	struct hd_struct *part;
-	int i;
-	for (i = 0; i < gendiskp->minors - 1; i++) {
-		part = gendiskp->part[i];
-		if (part && part->start_sect <= sector
-		    && sector < part->start_sect + part->nr_sects)
-			return part;
+	if (likely(part)) {
+		if (part->partno)
+			return dev_to_disk(part_to_dev(part)->parent);
+		else
+			return dev_to_disk(part_to_dev(part));
 	}
 	return NULL;
 }
 
+static inline int disk_max_parts(struct gendisk *disk)
+{
+	if (disk->flags & GENHD_FL_EXT_DEVT)
+		return DISK_MAX_PARTS;
+	return disk->minors;
+}
+
+static inline bool disk_partitionable(struct gendisk *disk)
+{
+	return disk_max_parts(disk) > 1;
+}
+
+static inline dev_t disk_devt(struct gendisk *disk)
+{
+	return disk_to_dev(disk)->devt;
+}
+
+static inline dev_t part_devt(struct hd_struct *part)
+{
+	return part_to_dev(part)->devt;
+}
+
+extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno);
+
+static inline void disk_put_part(struct hd_struct *part)
+{
+	if (likely(part))
+		put_device(part_to_dev(part));
+}
+
+/*
+ * Smarter partition iterator without context limits.
+ */
+#define DISK_PITER_REVERSE	(1 << 0) /* iterate in the reverse direction */
+#define DISK_PITER_INCL_EMPTY	(1 << 1) /* include 0-sized parts */
+#define DISK_PITER_INCL_PART0	(1 << 2) /* include partition 0 */
+
+struct disk_part_iter {
+	struct gendisk		*disk;
+	struct hd_struct	*part;
+	int			idx;
+	unsigned int		flags;
+};
+
+extern void disk_part_iter_init(struct disk_part_iter *piter,
+				 struct gendisk *disk, unsigned int flags);
+extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter);
+extern void disk_part_iter_exit(struct disk_part_iter *piter);
+
+extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
+					     sector_t sector);
+
+/*
+ * Macros to operate on percpu disk statistics:
+ *
+ * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters
+ * and should be called between disk_stat_lock() and
+ * disk_stat_unlock().
+ *
+ * part_stat_read() can be called at any time.
+ *
+ * part_stat_{add|set_all}() and {init|free}_part_stats are for
+ * internal use only.
+ */
 #ifdef	CONFIG_SMP
-#define __disk_stat_add(gendiskp, field, addnd) 	\
-	(per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd)
+#define part_stat_lock()	({ rcu_read_lock(); get_cpu(); })
+#define part_stat_unlock()	do { put_cpu(); rcu_read_unlock(); } while (0)
 
-#define disk_stat_read(gendiskp, field)					\
-({									\
-	typeof(gendiskp->dkstats->field) res = 0;			\
-	int i;								\
-	for_each_possible_cpu(i)					\
-		res += per_cpu_ptr(gendiskp->dkstats, i)->field;	\
-	res;								\
-})
-
-static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
-	int i;
-
-	for_each_possible_cpu(i)
-		memset(per_cpu_ptr(gendiskp->dkstats, i), value,
-				sizeof(struct disk_stats));
-}		
-
-#define __part_stat_add(part, field, addnd)				\
-	(per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd)
-
-#define __all_stat_add(gendiskp, part, field, addnd, sector)	\
-({								\
-	if (part)						\
-		__part_stat_add(part, field, addnd);		\
-	__disk_stat_add(gendiskp, field, addnd);		\
-})
+#define __part_stat_add(cpu, part, field, addnd)			\
+	(per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd))
 
 #define part_stat_read(part, field)					\
 ({									\
-	typeof(part->dkstats->field) res = 0;				\
+	typeof((part)->dkstats->field) res = 0;				\
 	int i;								\
 	for_each_possible_cpu(i)					\
-		res += per_cpu_ptr(part->dkstats, i)->field;		\
+		res += per_cpu_ptr((part)->dkstats, i)->field;		\
 	res;								\
 })
 
@@ -213,108 +268,6 @@
 		memset(per_cpu_ptr(part->dkstats, i), value,
 				sizeof(struct disk_stats));
 }
-				
-#else /* !CONFIG_SMP */
-#define __disk_stat_add(gendiskp, field, addnd) \
-				(gendiskp->dkstats.field += addnd)
-#define disk_stat_read(gendiskp, field)	(gendiskp->dkstats.field)
-
-static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)
-{
-	memset(&gendiskp->dkstats, value, sizeof (struct disk_stats));
-}
-
-#define __part_stat_add(part, field, addnd) \
-	(part->dkstats.field += addnd)
-
-#define __all_stat_add(gendiskp, part, field, addnd, sector)	\
-({								\
-	if (part)						\
-		part->dkstats.field += addnd;			\
-	__disk_stat_add(gendiskp, field, addnd);		\
-})
-
-#define part_stat_read(part, field)	(part->dkstats.field)
-
-static inline void part_stat_set_all(struct hd_struct *part, int value)
-{
-	memset(&part->dkstats, value, sizeof(struct disk_stats));
-}
-
-#endif /* CONFIG_SMP */
-
-#define disk_stat_add(gendiskp, field, addnd)			\
-	do {							\
-		preempt_disable();				\
-		__disk_stat_add(gendiskp, field, addnd);	\
-		preempt_enable();				\
-	} while (0)
-
-#define __disk_stat_dec(gendiskp, field) __disk_stat_add(gendiskp, field, -1)
-#define disk_stat_dec(gendiskp, field) disk_stat_add(gendiskp, field, -1)
-
-#define __disk_stat_inc(gendiskp, field) __disk_stat_add(gendiskp, field, 1)
-#define disk_stat_inc(gendiskp, field) disk_stat_add(gendiskp, field, 1)
-
-#define __disk_stat_sub(gendiskp, field, subnd) \
-		__disk_stat_add(gendiskp, field, -subnd)
-#define disk_stat_sub(gendiskp, field, subnd) \
-		disk_stat_add(gendiskp, field, -subnd)
-
-#define part_stat_add(gendiskp, field, addnd)		\
-	do {						\
-		preempt_disable();			\
-		__part_stat_add(gendiskp, field, addnd);\
-		preempt_enable();			\
-	} while (0)
-
-#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1)
-#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1)
-
-#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1)
-#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1)
-
-#define __part_stat_sub(gendiskp, field, subnd) \
-		__part_stat_add(gendiskp, field, -subnd)
-#define part_stat_sub(gendiskp, field, subnd) \
-		part_stat_add(gendiskp, field, -subnd)
-
-#define all_stat_add(gendiskp, part, field, addnd, sector)	\
-	do {							\
-		preempt_disable();				\
-		__all_stat_add(gendiskp, part, field, addnd, sector);	\
-		preempt_enable();				\
-	} while (0)
-
-#define __all_stat_dec(gendiskp, field, sector) \
-		__all_stat_add(gendiskp, field, -1, sector)
-#define all_stat_dec(gendiskp, field, sector) \
-		all_stat_add(gendiskp, field, -1, sector)
-
-#define __all_stat_inc(gendiskp, part, field, sector) \
-		__all_stat_add(gendiskp, part, field, 1, sector)
-#define all_stat_inc(gendiskp, part, field, sector) \
-		all_stat_add(gendiskp, part, field, 1, sector)
-
-#define __all_stat_sub(gendiskp, part, field, subnd, sector) \
-		__all_stat_add(gendiskp, part, field, -subnd, sector)
-#define all_stat_sub(gendiskp, part, field, subnd, sector) \
-		all_stat_add(gendiskp, part, field, -subnd, sector)
-
-/* Inlines to alloc and free disk stats in struct gendisk */
-#ifdef  CONFIG_SMP
-static inline int init_disk_stats(struct gendisk *disk)
-{
-	disk->dkstats = alloc_percpu(struct disk_stats);
-	if (!disk->dkstats)
-		return 0;
-	return 1;
-}
-
-static inline void free_disk_stats(struct gendisk *disk)
-{
-	free_percpu(disk->dkstats);
-}
 
 static inline int init_part_stats(struct hd_struct *part)
 {
@@ -329,14 +282,18 @@
 	free_percpu(part->dkstats);
 }
 
-#else	/* CONFIG_SMP */
-static inline int init_disk_stats(struct gendisk *disk)
-{
-	return 1;
-}
+#else /* !CONFIG_SMP */
+#define part_stat_lock()	({ rcu_read_lock(); 0; })
+#define part_stat_unlock()	rcu_read_unlock()
 
-static inline void free_disk_stats(struct gendisk *disk)
+#define __part_stat_add(cpu, part, field, addnd)				\
+	((part)->dkstats.field += addnd)
+
+#define part_stat_read(part, field)	((part)->dkstats.field)
+
+static inline void part_stat_set_all(struct hd_struct *part, int value)
 {
+	memset(&part->dkstats, value, sizeof(struct disk_stats));
 }
 
 static inline int init_part_stats(struct hd_struct *part)
@@ -347,37 +304,71 @@
 static inline void free_part_stats(struct hd_struct *part)
 {
 }
-#endif	/* CONFIG_SMP */
+
+#endif /* CONFIG_SMP */
+
+#define part_stat_add(cpu, part, field, addnd)	do {			\
+	__part_stat_add((cpu), (part), field, addnd);			\
+	if ((part)->partno)						\
+		__part_stat_add((cpu), &part_to_disk((part))->part0,	\
+				field, addnd);				\
+} while (0)
+
+#define part_stat_dec(cpu, gendiskp, field)				\
+	part_stat_add(cpu, gendiskp, field, -1)
+#define part_stat_inc(cpu, gendiskp, field)				\
+	part_stat_add(cpu, gendiskp, field, 1)
+#define part_stat_sub(cpu, gendiskp, field, subnd)			\
+	part_stat_add(cpu, gendiskp, field, -subnd)
+
+static inline void part_inc_in_flight(struct hd_struct *part)
+{
+	part->in_flight++;
+	if (part->partno)
+		part_to_disk(part)->part0.in_flight++;
+}
+
+static inline void part_dec_in_flight(struct hd_struct *part)
+{
+	part->in_flight--;
+	if (part->partno)
+		part_to_disk(part)->part0.in_flight--;
+}
 
 /* drivers/block/ll_rw_blk.c */
-extern void disk_round_stats(struct gendisk *disk);
-extern void part_round_stats(struct hd_struct *part);
+extern void part_round_stats(int cpu, struct hd_struct *part);
 
 /* drivers/block/genhd.c */
 extern int get_blkdev_list(char *, int);
 extern void add_disk(struct gendisk *disk);
 extern void del_gendisk(struct gendisk *gp);
 extern void unlink_gendisk(struct gendisk *gp);
-extern struct gendisk *get_gendisk(dev_t dev, int *part);
+extern struct gendisk *get_gendisk(dev_t dev, int *partno);
+extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
 
 extern void set_device_ro(struct block_device *bdev, int flag);
 extern void set_disk_ro(struct gendisk *disk, int flag);
 
+static inline int get_disk_ro(struct gendisk *disk)
+{
+	return disk->part0.policy;
+}
+
 /* drivers/char/random.c */
 extern void add_disk_randomness(struct gendisk *disk);
 extern void rand_initialize_disk(struct gendisk *disk);
 
 static inline sector_t get_start_sect(struct block_device *bdev)
 {
-	return bdev->bd_contains == bdev ? 0 : bdev->bd_part->start_sect;
+	return bdev->bd_part->start_sect;
 }
 static inline sector_t get_capacity(struct gendisk *disk)
 {
-	return disk->capacity;
+	return disk->part0.nr_sects;
 }
 static inline void set_capacity(struct gendisk *disk, sector_t size)
 {
-	disk->capacity = size;
+	disk->part0.nr_sects = size;
 }
 
 #ifdef CONFIG_SOLARIS_X86_PARTITION
@@ -527,9 +518,12 @@
 #define ADDPART_FLAG_RAID	1
 #define ADDPART_FLAG_WHOLEDISK	2
 
-extern dev_t blk_lookup_devt(const char *name, int part);
-extern char *disk_name (struct gendisk *hd, int part, char *buf);
+extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
+extern void blk_free_devt(dev_t devt);
+extern dev_t blk_lookup_devt(const char *name, int partno);
+extern char *disk_name (struct gendisk *hd, int partno, char *buf);
 
+extern int disk_expand_part_tbl(struct gendisk *disk, int target);
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
 extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int);
 extern void delete_partition(struct gendisk *, int);
@@ -546,16 +540,23 @@
 			void *data);
 extern void blk_unregister_region(dev_t devt, unsigned long range);
 
-static inline struct block_device *bdget_disk(struct gendisk *disk, int index)
-{
-	return bdget(MKDEV(disk->major, disk->first_minor) + index);
-}
+extern ssize_t part_size_show(struct device *dev,
+			      struct device_attribute *attr, char *buf);
+extern ssize_t part_stat_show(struct device *dev,
+			      struct device_attribute *attr, char *buf);
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+extern ssize_t part_fail_show(struct device *dev,
+			      struct device_attribute *attr, char *buf);
+extern ssize_t part_fail_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count);
+#endif /* CONFIG_FAIL_MAKE_REQUEST */
 
 #else /* CONFIG_BLOCK */
 
 static inline void printk_all_partitions(void) { }
 
-static inline dev_t blk_lookup_devt(const char *name, int part)
+static inline dev_t blk_lookup_devt(const char *name, int partno)
 {
 	dev_t devt = MKDEV(0, 0);
 	return devt;

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 1524829..6514db8 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h

@@ -366,7 +366,9 @@
 	/* Currently on a filemark */
 	IDE_AFLAG_FILEMARK		= (1 << 25),
 	/* 0 = no tape is loaded, so we don't rewind after ejecting */
-	IDE_AFLAG_MEDIUM_PRESENT	= (1 << 26)
+	IDE_AFLAG_MEDIUM_PRESENT	= (1 << 26),
+
+	IDE_AFLAG_NO_AUTOCLOSE		= (1 << 27),
 };
 
 struct ide_drive_s {

diff --git a/include/linux/klist.h b/include/linux/klist.h
index 06c338e..8ea98db 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h

@@ -38,7 +38,7 @@
 		       void (*put)(struct klist_node *));
 
 struct klist_node {
-	struct klist		*n_klist;
+	void			*n_klist;	/* never access directly */
 	struct list_head	n_node;
 	struct kref		n_ref;
 	struct completion	n_removed;
@@ -57,7 +57,6 @@
 
 struct klist_iter {
 	struct klist		*i_klist;
-	struct list_head	*i_head;
 	struct klist_node	*i_cur;
 };
 

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 225bfc5..947cf84 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h

@@ -146,6 +146,7 @@
 	ATA_DFLAG_SPUNDOWN	= (1 << 14), /* XXX: for spindown_compat */
 	ATA_DFLAG_SLEEPING	= (1 << 15), /* device is sleeping */
 	ATA_DFLAG_DUBIOUS_XFER	= (1 << 16), /* data transfer not verified */
+	ATA_DFLAG_NO_UNLOAD	= (1 << 17), /* device doesn't support unload */
 	ATA_DFLAG_INIT_MASK	= (1 << 24) - 1,
 
 	ATA_DFLAG_DETACH	= (1 << 24),
@@ -244,6 +245,7 @@
 	ATA_TMOUT_BOOT		= 30000,	/* heuristic */
 	ATA_TMOUT_BOOT_QUICK	=  7000,	/* heuristic */
 	ATA_TMOUT_INTERNAL_QUICK = 5000,
+	ATA_TMOUT_MAX_PARK	= 30000,
 
 	/* FIXME: GoVault needs 2s but we can't afford that without
 	 * parallel probing.  800ms is enough for iVDR disk
@@ -319,8 +321,11 @@
 	ATA_EH_RESET		= ATA_EH_SOFTRESET | ATA_EH_HARDRESET,
 	ATA_EH_ENABLE_LINK	= (1 << 3),
 	ATA_EH_LPM		= (1 << 4),  /* link power management action */
+	ATA_EH_PARK		= (1 << 5), /* unload heads and stop I/O */
 
-	ATA_EH_PERDEV_MASK	= ATA_EH_REVALIDATE,
+	ATA_EH_PERDEV_MASK	= ATA_EH_REVALIDATE | ATA_EH_PARK,
+	ATA_EH_ALL_ACTIONS	= ATA_EH_REVALIDATE | ATA_EH_RESET |
+				  ATA_EH_ENABLE_LINK | ATA_EH_LPM,
 
 	/* ata_eh_info->flags */
 	ATA_EHI_HOTPLUGGED	= (1 << 0),  /* could have been hotplugged */
@@ -452,6 +457,7 @@
 	MEDIUM_POWER,
 };
 extern struct device_attribute dev_attr_link_power_management_policy;
+extern struct device_attribute dev_attr_unload_heads;
 extern struct device_attribute dev_attr_em_message_type;
 extern struct device_attribute dev_attr_em_message;
 extern struct device_attribute dev_attr_sw_activity;
@@ -554,8 +560,8 @@
 struct ata_device {
 	struct ata_link		*link;
 	unsigned int		devno;		/* 0 or 1 */
-	unsigned long		flags;		/* ATA_DFLAG_xxx */
 	unsigned int		horkage;	/* List of broken features */
+	unsigned long		flags;		/* ATA_DFLAG_xxx */
 	struct scsi_device	*sdev;		/* attached SCSI device */
 #ifdef CONFIG_ATA_ACPI
 	acpi_handle		acpi_handle;
@@ -564,6 +570,7 @@
 	/* n_sector is used as CLEAR_OFFSET, read comment above CLEAR_OFFSET */
 	u64			n_sectors;	/* size of device, if ATA */
 	unsigned int		class;		/* ATA_DEV_xxx */
+	unsigned long		unpark_deadline;
 
 	u8			pio_mode;
 	u8			dma_mode;
@@ -621,6 +628,7 @@
 					       [ATA_EH_CMD_TIMEOUT_TABLE_SIZE];
 	unsigned int		classes[ATA_MAX_DEVICES];
 	unsigned int		did_probe_mask;
+	unsigned int		unloaded_mask;
 	unsigned int		saved_ncq_enabled;
 	u8			saved_xfer_mode[ATA_MAX_DEVICES];
 	/* timestamp for the last reset attempt or success */
@@ -688,7 +696,8 @@
 	unsigned int		qc_active;
 	int			nr_active_links; /* #links with active qcs */
 
-	struct ata_link		link;	/* host default link */
+	struct ata_link		link;		/* host default link */
+	struct ata_link		*slave_link;	/* see ata_slave_link_init() */
 
 	int			nr_pmp_links;	/* nr of available PMP links */
 	struct ata_link		*pmp_link;	/* array of PMP links */
@@ -709,6 +718,7 @@
 	struct list_head	eh_done_q;
 	wait_queue_head_t	eh_wait_q;
 	int			eh_tries;
+	struct completion	park_req_pending;
 
 	pm_message_t		pm_mesg;
 	int			*pm_result;
@@ -772,8 +782,8 @@
 	/*
 	 * Optional features
 	 */
-	int  (*scr_read)(struct ata_port *ap, unsigned int sc_reg, u32 *val);
-	int  (*scr_write)(struct ata_port *ap, unsigned int sc_reg, u32 val);
+	int  (*scr_read)(struct ata_link *link, unsigned int sc_reg, u32 *val);
+	int  (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val);
 	void (*pmp_attach)(struct ata_port *ap);
 	void (*pmp_detach)(struct ata_port *ap);
 	int  (*enable_pm)(struct ata_port *ap, enum link_pm policy);
@@ -895,6 +905,7 @@
 extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports);
 extern struct ata_host *ata_host_alloc_pinfo(struct device *dev,
 			const struct ata_port_info * const * ppi, int n_ports);
+extern int ata_slave_link_init(struct ata_port *ap);
 extern int ata_host_start(struct ata_host *host);
 extern int ata_host_register(struct ata_host *host,
 			     struct scsi_host_template *sht);
@@ -920,8 +931,8 @@
 extern int sata_scr_read(struct ata_link *link, int reg, u32 *val);
 extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
 extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val);
-extern int ata_link_online(struct ata_link *link);
-extern int ata_link_offline(struct ata_link *link);
+extern bool ata_link_online(struct ata_link *link);
+extern bool ata_link_offline(struct ata_link *link);
 #ifdef CONFIG_PM
 extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg);
 extern void ata_host_resume(struct ata_host *host);
@@ -1098,6 +1109,7 @@
  */
 extern const struct ata_port_operations ata_base_port_ops;
 extern const struct ata_port_operations sata_port_ops;
+extern struct device_attribute *ata_common_sdev_attrs[];
 
 #define ATA_BASE_SHT(drv_name)					\
 	.module			= THIS_MODULE,			\
@@ -1112,7 +1124,8 @@
 	.proc_name		= drv_name,			\
 	.slave_configure	= ata_scsi_slave_config,	\
 	.slave_destroy		= ata_scsi_slave_destroy,	\
-	.bios_param		= ata_std_bios_param
+	.bios_param		= ata_std_bios_param,		\
+	.sdev_attrs		= ata_common_sdev_attrs
 
 #define ATA_NCQ_SHT(drv_name)					\
 	ATA_BASE_SHT(drv_name),					\
@@ -1134,7 +1147,7 @@
 
 static inline int ata_is_host_link(const struct ata_link *link)
 {
-	return link == &link->ap->link;
+	return link == &link->ap->link || link == link->ap->slave_link;
 }
 #else /* CONFIG_SATA_PMP */
 static inline bool sata_pmp_supported(struct ata_port *ap)
@@ -1167,7 +1180,7 @@
 	printk("%sata%u: "fmt, lv, (ap)->print_id , ##args)
 
 #define ata_link_printk(link, lv, fmt, args...) do { \
-	if (sata_pmp_attached((link)->ap)) \
+	if (sata_pmp_attached((link)->ap) || (link)->ap->slave_link)	\
 		printk("%sata%u.%02u: "fmt, lv, (link)->ap->print_id,	\
 		       (link)->pmp , ##args); \
 	else \
@@ -1265,34 +1278,17 @@
 	return ata_tag_valid(link->active_tag) || link->sactive;
 }
 
-static inline struct ata_link *ata_port_first_link(struct ata_port *ap)
-{
-	if (sata_pmp_attached(ap))
-		return ap->pmp_link;
-	return &ap->link;
-}
+extern struct ata_link *__ata_port_next_link(struct ata_port *ap,
+					     struct ata_link *link,
+					     bool dev_only);
 
-static inline struct ata_link *ata_port_next_link(struct ata_link *link)
-{
-	struct ata_port *ap = link->ap;
-
-	if (ata_is_host_link(link)) {
-		if (!sata_pmp_attached(ap))
-			return NULL;
-		return ap->pmp_link;
-	}
-
-	if (++link < ap->nr_pmp_links + ap->pmp_link)
-		return link;
-	return NULL;
-}
-
-#define __ata_port_for_each_link(lk, ap) \
-	for ((lk) = &(ap)->link; (lk); (lk) = ata_port_next_link(lk))
+#define __ata_port_for_each_link(link, ap) \
+	for ((link) = __ata_port_next_link((ap), NULL, false); (link); \
+	     (link) = __ata_port_next_link((ap), (link), false))
 
 #define ata_port_for_each_link(link, ap) \
-	for ((link) = ata_port_first_link(ap); (link); \
-	     (link) = ata_port_next_link(link))
+	for ((link) = __ata_port_next_link((ap), NULL, true); (link); \
+	     (link) = __ata_port_next_link((ap), (link), true))
 
 #define ata_link_for_each_dev(dev, link) \
 	for ((dev) = (link)->device; \

diff --git a/include/linux/major.h b/include/linux/major.h
index 53d5faf..8824945 100644
--- a/include/linux/major.h
+++ b/include/linux/major.h

@@ -170,4 +170,6 @@
 
 #define VIOTAPE_MAJOR		230
 
+#define BLOCK_EXT_MAJOR		259
+
 #endif

diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index 310e616..8b4aa05 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h

@@ -41,6 +41,8 @@
 		    unsigned long block, char *buffer);
 	int (*writesect)(struct mtd_blktrans_dev *dev,
 		     unsigned long block, char *buffer);
+	int (*discard)(struct mtd_blktrans_dev *dev,
+		       unsigned long block, unsigned nr_blocks);
 
 	/* Block layer ioctls */
 	int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo);

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 5da9794..b106fd8 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h

@@ -1,6 +1,8 @@
 #ifndef __LINUX_STACKTRACE_H
 #define __LINUX_STACKTRACE_H
 
+struct task_struct;
+
 #ifdef CONFIG_STACKTRACE
 struct stack_trace {
 	unsigned int nr_entries, max_entries;

diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index f9f6e79..855bf95 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h

@@ -75,7 +75,6 @@
 
 	int retries;
 	int allowed;
-	int timeout_per_command;
 
 	unsigned char prot_op;
 	unsigned char prot_type;
@@ -86,7 +85,6 @@
 	/* These elements define the operation we are about to perform */
 	unsigned char *cmnd;
 
-	struct timer_list eh_timeout;	/* Used to time out the command. */
 
 	/* These elements define the operation we ultimately want to perform */
 	struct scsi_data_buffer sdb;
@@ -139,7 +137,6 @@
 extern void __scsi_put_command(struct Scsi_Host *, struct scsi_cmnd *,
 			       struct device *);
 extern void scsi_finish_command(struct scsi_cmnd *cmd);
-extern void scsi_req_abort_cmd(struct scsi_cmnd *cmd);
 
 extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
 				 size_t *offset, size_t *len);

diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 44a55d1..d123ca8 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h

@@ -43,13 +43,6 @@
 #define DISABLE_CLUSTERING 0
 #define ENABLE_CLUSTERING 1
 
-enum scsi_eh_timer_return {
-	EH_NOT_HANDLED,
-	EH_HANDLED,
-	EH_RESET_TIMER,
-};
-
-
 struct scsi_host_template {
 	struct module *module;
 	const char *name;
@@ -347,7 +340,7 @@
 	 *
 	 * Status: OPTIONAL
 	 */
-	enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *);
+	enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *);
 
 	/*
 	 * Name of proc directory

diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.h
index 490bd13..0de32cd 100644
--- a/include/scsi/scsi_transport.h
+++ b/include/scsi/scsi_transport.h

@@ -21,6 +21,7 @@
 #define SCSI_TRANSPORT_H
 
 #include <linux/transport_class.h>
+#include <linux/blkdev.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_device.h>
 
@@ -64,7 +65,7 @@
 	 *			begin counting again
 	 * EH_NOT_HANDLED	Begin normal error recovery
 	 */
-	enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *);
+	enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *);
 
 	/*
 	 * Used as callback for the completion of i_t_nexus request

diff --git a/init/do_mounts.c b/init/do_mounts.c
index 3715feb..d055b19 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c

@@ -263,6 +263,10 @@
 		printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
 
 		printk_all_partitions();
+#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
+		printk("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify "
+		       "explicit textual name for \"root=\" boot option.\n");
+#endif
 		panic("VFS: Unable to mount root fs on %s", b);
 	}
 

diff --git a/init/main.c b/init/main.c
index f6f7042..3820323 100644
--- a/init/main.c
+++ b/init/main.c

@@ -708,7 +708,7 @@
 	int result;
 
 	if (initcall_debug) {
-		print_fn_descriptor_symbol("calling  %s\n", fn);
+		printk("calling  %pF\n", fn);
 		t0 = ktime_get();
 	}
 
@@ -718,8 +718,8 @@
 		t1 = ktime_get();
 		delta = ktime_sub(t1, t0);
 
-		print_fn_descriptor_symbol("initcall %s", fn);
-		printk(" returned %d after %Ld msecs\n", result,
+		printk("initcall %pF returned %d after %Ld msecs\n",
+			fn, result,
 			(unsigned long long) delta.tv64 >> 20);
 	}
 
@@ -737,8 +737,7 @@
 		local_irq_enable();
 	}
 	if (msgbuf[0]) {
-		print_fn_descriptor_symbol(KERN_WARNING "initcall %s", fn);
-		printk(" returned with %s\n", msgbuf);
+		printk("initcall %pF returned with %s\n", fn, msgbuf);
 	}
 
 	return result;

diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 25d955d..e4dcfb2 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c

@@ -590,6 +590,7 @@
 
 	/* Signal the primary CPU that we are done: */
 	atomic_set(&cpu_in_kgdb[cpu], 0);
+	touch_softlockup_watchdog();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 }
@@ -1432,6 +1433,7 @@
 	    atomic_read(&kgdb_cpu_doing_single_step) != cpu) {
 
 		atomic_set(&kgdb_active, -1);
+		touch_softlockup_watchdog();
 		clocksource_touch_watchdog();
 		local_irq_restore(flags);
 
@@ -1524,6 +1526,7 @@
 kgdb_restore:
 	/* Free kgdb_active */
 	atomic_set(&kgdb_active, -1);
+	touch_softlockup_watchdog();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index bd70345..cb01cd8 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c

@@ -235,7 +235,8 @@
 	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
 		if (!cpu_isset(cpu, tick_broadcast_mask)) {
 			cpu_set(cpu, tick_broadcast_mask);
-			if (bc->mode == TICKDEV_MODE_PERIODIC)
+			if (tick_broadcast_device.mode ==
+			    TICKDEV_MODE_PERIODIC)
 				clockevents_shutdown(dev);
 		}
 		if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
@@ -245,7 +246,8 @@
 		if (!tick_broadcast_force &&
 		    cpu_isset(cpu, tick_broadcast_mask)) {
 			cpu_clear(cpu, tick_broadcast_mask);
-			if (bc->mode == TICKDEV_MODE_PERIODIC)
+			if (tick_broadcast_device.mode ==
+			    TICKDEV_MODE_PERIODIC)
 				tick_setup_periodic(dev, 0);
 		}
 		break;

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0b50481..7d7a31d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug

@@ -624,6 +624,28 @@
 
 	  Say N if you are unsure.
 
+config DEBUG_BLOCK_EXT_DEVT
+        bool "Force extended block device numbers and spread them"
+	depends on DEBUG_KERNEL
+	depends on BLOCK
+	default n
+	help
+	  Conventionally, block device numbers are allocated from
+	  predetermined contiguous area.  However, extended block area
+	  may introduce non-contiguous block device numbers.  This
+	  option forces most block device numbers to be allocated from
+	  the extended space and spreads them to discover kernel or
+	  userland code paths which assume predetermined contiguous
+	  device number allocation.
+
+	  Note that turning on this debug option shuffles all the
+	  device numbers for all IDE and SCSI devices including libata
+	  ones, so root partition specified using device number
+	  directly (via rdev or root=MAJ:MIN) won't work anymore.
+	  Textual device names (root=/dev/sdXn) will continue to work.
+
+	  Say N if you are unsure.
+
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
 	depends on DEBUG_KERNEL
@@ -661,10 +683,21 @@
 
 config FAIL_MAKE_REQUEST
 	bool "Fault-injection capability for disk IO"
-	depends on FAULT_INJECTION
+	depends on FAULT_INJECTION && BLOCK
 	help
 	  Provide fault-injection capability for disk IO.
 
+config FAIL_IO_TIMEOUT
+	bool "Faul-injection capability for faking disk interrupts"
+	depends on FAULT_INJECTION && BLOCK
+	help
+	  Provide fault-injection capability on end IO handling. This
+	  will make the block layer "forget" an interrupt as configured,
+	  thus exercising the error handling.
+
+	  Only works with drivers that use the generic timeout handling,
+	  for others it wont do anything.
+
 config FAULT_INJECTION_DEBUG_FS
 	bool "Debugfs entries for fault-injection capabilities"
 	depends on FAULT_INJECTION && SYSFS && DEBUG_FS

diff --git a/lib/klist.c b/lib/klist.c
index cca37f9..bbdd301 100644
--- a/lib/klist.c
+++ b/lib/klist.c

@@ -37,6 +37,37 @@
 #include <linux/klist.h>
 #include <linux/module.h>
 
+/*
+ * Use the lowest bit of n_klist to mark deleted nodes and exclude
+ * dead ones from iteration.
+ */
+#define KNODE_DEAD		1LU
+#define KNODE_KLIST_MASK	~KNODE_DEAD
+
+static struct klist *knode_klist(struct klist_node *knode)
+{
+	return (struct klist *)
+		((unsigned long)knode->n_klist & KNODE_KLIST_MASK);
+}
+
+static bool knode_dead(struct klist_node *knode)
+{
+	return (unsigned long)knode->n_klist & KNODE_DEAD;
+}
+
+static void knode_set_klist(struct klist_node *knode, struct klist *klist)
+{
+	knode->n_klist = klist;
+	/* no knode deserves to start its life dead */
+	WARN_ON(knode_dead(knode));
+}
+
+static void knode_kill(struct klist_node *knode)
+{
+	/* and no knode should die twice ever either, see we're very humane */
+	WARN_ON(knode_dead(knode));
+	*(unsigned long *)&knode->n_klist |= KNODE_DEAD;
+}
 
 /**
  * klist_init - Initialize a klist structure.
@@ -79,7 +110,7 @@
 	INIT_LIST_HEAD(&n->n_node);
 	init_completion(&n->n_removed);
 	kref_init(&n->n_ref);
-	n->n_klist = k;
+	knode_set_klist(n, k);
 	if (k->get)
 		k->get(n);
 }
@@ -115,7 +146,7 @@
  */
 void klist_add_after(struct klist_node *n, struct klist_node *pos)
 {
-	struct klist *k = pos->n_klist;
+	struct klist *k = knode_klist(pos);
 
 	klist_node_init(k, n);
 	spin_lock(&k->k_lock);
@@ -131,7 +162,7 @@
  */
 void klist_add_before(struct klist_node *n, struct klist_node *pos)
 {
-	struct klist *k = pos->n_klist;
+	struct klist *k = knode_klist(pos);
 
 	klist_node_init(k, n);
 	spin_lock(&k->k_lock);
@@ -144,9 +175,10 @@
 {
 	struct klist_node *n = container_of(kref, struct klist_node, n_ref);
 
+	WARN_ON(!knode_dead(n));
 	list_del(&n->n_node);
 	complete(&n->n_removed);
-	n->n_klist = NULL;
+	knode_set_klist(n, NULL);
 }
 
 static int klist_dec_and_del(struct klist_node *n)
@@ -154,21 +186,28 @@
 	return kref_put(&n->n_ref, klist_release);
 }
 
+static void klist_put(struct klist_node *n, bool kill)
+{
+	struct klist *k = knode_klist(n);
+	void (*put)(struct klist_node *) = k->put;
+
+	spin_lock(&k->k_lock);
+	if (kill)
+		knode_kill(n);
+	if (!klist_dec_and_del(n))
+		put = NULL;
+	spin_unlock(&k->k_lock);
+	if (put)
+		put(n);
+}
+
 /**
  * klist_del - Decrement the reference count of node and try to remove.
  * @n: node we're deleting.
  */
 void klist_del(struct klist_node *n)
 {
-	struct klist *k = n->n_klist;
-	void (*put)(struct klist_node *) = k->put;
-
-	spin_lock(&k->k_lock);
-	if (!klist_dec_and_del(n))
-		put = NULL;
-	spin_unlock(&k->k_lock);
-	if (put)
-		put(n);
+	klist_put(n, true);
 }
 EXPORT_SYMBOL_GPL(klist_del);
 
@@ -206,7 +245,6 @@
 			  struct klist_node *n)
 {
 	i->i_klist = k;
-	i->i_head = &k->k_list;
 	i->i_cur = n;
 	if (n)
 		kref_get(&n->n_ref);
@@ -237,7 +275,7 @@
 void klist_iter_exit(struct klist_iter *i)
 {
 	if (i->i_cur) {
-		klist_del(i->i_cur);
+		klist_put(i->i_cur, false);
 		i->i_cur = NULL;
 	}
 }
@@ -258,27 +296,33 @@
  */
 struct klist_node *klist_next(struct klist_iter *i)
 {
-	struct list_head *next;
-	struct klist_node *lnode = i->i_cur;
-	struct klist_node *knode = NULL;
 	void (*put)(struct klist_node *) = i->i_klist->put;
+	struct klist_node *last = i->i_cur;
+	struct klist_node *next;
 
 	spin_lock(&i->i_klist->k_lock);
-	if (lnode) {
-		next = lnode->n_node.next;
-		if (!klist_dec_and_del(lnode))
+
+	if (last) {
+		next = to_klist_node(last->n_node.next);
+		if (!klist_dec_and_del(last))
 			put = NULL;
 	} else
-		next = i->i_head->next;
+		next = to_klist_node(i->i_klist->k_list.next);
 
-	if (next != i->i_head) {
-		knode = to_klist_node(next);
-		kref_get(&knode->n_ref);
+	i->i_cur = NULL;
+	while (next != to_klist_node(&i->i_klist->k_list)) {
+		if (likely(!knode_dead(next))) {
+			kref_get(&next->n_ref);
+			i->i_cur = next;
+			break;
+		}
+		next = to_klist_node(next->n_node.next);
 	}
-	i->i_cur = knode;
+
 	spin_unlock(&i->i_klist->k_lock);
-	if (put && lnode)
-		put(lnode);
-	return knode;
+
+	if (put && last)
+		put(last);
+	return i->i_cur;
 }
 EXPORT_SYMBOL_GPL(klist_next);

diff --git a/mm/bounce.c b/mm/bounce.c
index b6d2d0f..06722c4 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c

@@ -267,7 +267,7 @@
 	/*
 	 * Data-less bio, nothing to bounce
 	 */
-	if (bio_empty_barrier(*bio_orig))
+	if (!bio_has_data(*bio_orig))
 		return;
 
 	/*

diff --git a/mm/slob.c b/mm/slob.c
index 4c82dd4..cb675d1 100644
--- a/mm/slob.c
+++ b/mm/slob.c

@@ -514,9 +514,11 @@
 		return 0;
 
 	sp = (struct slob_page *)virt_to_page(block);
-	if (slob_page(sp))
-		return ((slob_t *)block - 1)->units + SLOB_UNIT;
-	else
+	if (slob_page(sp)) {
+		int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+		unsigned int *m = (unsigned int *)(block - align);
+		return SLOB_UNITS(*m) * SLOB_UNIT;
+	} else
 		return sp->page.private;
 }
 

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 01c83e2..28c7157 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c

@@ -317,6 +317,9 @@
 				/* Queue the unaccepted socket for death */
 				sock_orphan(skb->sk);
 
+				/* 9A4GL: hack to release unaccepted sockets */
+				skb->sk->sk_state = TCP_LISTEN;
+
 				ax25_start_heartbeat(sax25);
 				sax25->state = AX25_STATE_0;
 			}

diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index cdc7e75..96e4b92 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c

@@ -39,9 +39,11 @@
 
 	switch (ax25->state) {
 	case AX25_STATE_0:
-		if (!sk ||
-		    sock_flag(sk, SOCK_DESTROY) ||
-		    sock_flag(sk, SOCK_DEAD)) {
+		/* Magic here: If we listen() and a new link dies before it
+		   is accepted() it isn't 'dead' so doesn't get removed. */
+		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
+		    (sk->sk_state == TCP_LISTEN &&
+		     sock_flag(sk, SOCK_DEAD))) {
 			if (sk) {
 				sock_hold(sk);
 				ax25_destroy_socket(ax25);

diff --git a/net/core/dev.c b/net/core/dev.c
index e8eb2b4..0ae08d3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c

@@ -2918,6 +2918,12 @@
 	return 0;
 }
 
+static void dev_change_rx_flags(struct net_device *dev, int flags)
+{
+	if (dev->flags & IFF_UP && dev->change_rx_flags)
+		dev->change_rx_flags(dev, flags);
+}
+
 static int __dev_set_promiscuity(struct net_device *dev, int inc)
 {
 	unsigned short old_flags = dev->flags;
@@ -2955,8 +2961,7 @@
 				current->uid, current->gid,
 				audit_get_sessionid(current));
 
-		if (dev->change_rx_flags)
-			dev->change_rx_flags(dev, IFF_PROMISC);
+		dev_change_rx_flags(dev, IFF_PROMISC);
 	}
 	return 0;
 }
@@ -3022,8 +3027,7 @@
 		}
 	}
 	if (dev->flags ^ old_flags) {
-		if (dev->change_rx_flags)
-			dev->change_rx_flags(dev, IFF_ALLMULTI);
+		dev_change_rx_flags(dev, IFF_ALLMULTI);
 		dev_set_rx_mode(dev);
 	}
 	return 0;
@@ -3347,8 +3351,8 @@
 	 *	Load in the correct multicast list now the flags have changed.
 	 */
 
-	if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST)
-		dev->change_rx_flags(dev, IFF_MULTICAST);
+	if ((old_flags ^ flags) & IFF_MULTICAST)
+		dev_change_rx_flags(dev, IFF_MULTICAST);
 
 	dev_set_rx_mode(dev);
 
@@ -3808,14 +3812,11 @@
 }
 
 /* Delayed registration/unregisteration */
-static DEFINE_SPINLOCK(net_todo_list_lock);
 static LIST_HEAD(net_todo_list);
 
 static void net_set_todo(struct net_device *dev)
 {
-	spin_lock(&net_todo_list_lock);
 	list_add_tail(&dev->todo_list, &net_todo_list);
-	spin_unlock(&net_todo_list_lock);
 }
 
 static void rollback_registered(struct net_device *dev)
@@ -4142,33 +4143,24 @@
  *	free_netdev(y1);
  *	free_netdev(y2);
  *
- * We are invoked by rtnl_unlock() after it drops the semaphore.
+ * We are invoked by rtnl_unlock().
  * This allows us to deal with problems:
  * 1) We can delete sysfs objects which invoke hotplug
  *    without deadlocking with linkwatch via keventd.
  * 2) Since we run with the RTNL semaphore not held, we can sleep
  *    safely in order to wait for the netdev refcnt to drop to zero.
+ *
+ * We must not return until all unregister events added during
+ * the interval the lock was held have been completed.
  */
-static DEFINE_MUTEX(net_todo_run_mutex);
 void netdev_run_todo(void)
 {
 	struct list_head list;
 
-	/* Need to guard against multiple cpu's getting out of order. */
-	mutex_lock(&net_todo_run_mutex);
-
-	/* Not safe to do outside the semaphore.  We must not return
-	 * until all unregister events invoked by the local processor
-	 * have been completed (either by this todo run, or one on
-	 * another cpu).
-	 */
-	if (list_empty(&net_todo_list))
-		goto out;
-
 	/* Snapshot list, allow later requests */
-	spin_lock(&net_todo_list_lock);
 	list_replace_init(&net_todo_list, &list);
-	spin_unlock(&net_todo_list_lock);
+
+	__rtnl_unlock();
 
 	while (!list_empty(&list)) {
 		struct net_device *dev
@@ -4200,9 +4192,6 @@
 		/* Free network device */
 		kobject_put(&dev->dev.kobj);
 	}
-
-out:
-	mutex_unlock(&net_todo_run_mutex);
 }
 
 static struct net_device_stats *internal_stats(struct net_device *dev)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 71edb8b..d6381c2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c

@@ -73,7 +73,7 @@
 
 void rtnl_unlock(void)
 {
-	mutex_unlock(&rtnl_mutex);
+	/* This fellow will unlock it for us. */
 	netdev_run_todo();
 }
 

diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index bfcbd14..c209e05 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c

@@ -150,7 +150,11 @@
 		ca->snd_cwnd_cents -= 128;
 		tp->snd_cwnd_cnt = 0;
 	}
-
+	/* check when cwnd has not been incremented for a while */
+	if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+		tp->snd_cwnd++;
+		tp->snd_cwnd_cnt = 0;
+	}
 	/* clamp down slowstart cwnd to ssthresh value. */
 	if (is_slowstart)
 		tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 67ccce2..7abc6b8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c

@@ -4879,7 +4879,8 @@
 					goto no_ack;
 			}
 
-			__tcp_ack_snd_check(sk, 0);
+			if (!copied_early || tp->rcv_nxt != tp->rcv_wup)
+				__tcp_ack_snd_check(sk, 0);
 no_ack:
 #ifdef CONFIG_NET_DMA
 			if (copied_early)

diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 532e4fa..9f1ea4a 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c

@@ -525,6 +525,7 @@
 	if (sk == NULL) return 0;
 
 	sock_hold(sk);
+	sock_orphan(sk);
 	lock_sock(sk);
 	nr = nr_sk(sk);
 
@@ -548,7 +549,6 @@
 		sk->sk_state    = TCP_CLOSE;
 		sk->sk_shutdown |= SEND_SHUTDOWN;
 		sk->sk_state_change(sk);
-		sock_orphan(sk);
 		sock_set_flag(sk, SOCK_DESTROY);
 		break;
 

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 418cd7d..8e0de6a 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c

@@ -1986,11 +1986,13 @@
 
 		mod = find_module(modname);
 		if (!mod) {
-			if (is_vmlinux(modname))
-				have_vmlinux = 1;
 			mod = new_module(NOFAIL(strdup(modname)));
 			mod->skip = 1;
 		}
+		if (is_vmlinux(modname)) {
+			have_vmlinux = 1;
+			mod->skip = 0;
+		}
 
 		if (!mod->skip)
 			add_marker(mod, marker, fmt);

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index d11a815..8551952 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c

@@ -2737,6 +2737,7 @@
 		if (ctx == NULL)
 			goto netlbl_secattr_to_sid_return;
 
+		context_init(&ctx_new);
 		ctx_new.user = ctx->user;
 		ctx_new.role = ctx->role;
 		ctx_new.type = ctx->type;
@@ -2745,13 +2746,9 @@
 			if (ebitmap_netlbl_import(&ctx_new.range.level[0].cat,
 						  secattr->attr.mls.cat) != 0)
 				goto netlbl_secattr_to_sid_return;
-			ctx_new.range.level[1].cat.highbit =
-				ctx_new.range.level[0].cat.highbit;
-			ctx_new.range.level[1].cat.node =
-				ctx_new.range.level[0].cat.node;
-		} else {
-			ebitmap_init(&ctx_new.range.level[0].cat);
-			ebitmap_init(&ctx_new.range.level[1].cat);
+			memcpy(&ctx_new.range.level[1].cat,
+			       &ctx_new.range.level[0].cat,
+			       sizeof(ctx_new.range.level[0].cat));
 		}
 		if (mls_context_isvalid(&policydb, &ctx_new) != 1)
 			goto netlbl_secattr_to_sid_return_cleanup;
commit	ef5bef357cdf49f3a386c7102dbf3be5f7e5c913	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Fri Oct 10 10:53:26 2008 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Fri Oct 10 10:53:26 2008 -0700
tree	48d9dc86768e3e146267ea21d0c898f9008275a1
parent	e26feff647ef34423b048b940540a0059001ddb0 [diff]
parent	41bfcf90101601f9507240ff0435c1b73d28a132 [diff]