Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6:
  dsa: fix master interface allmulti/promisc handling
  dsa: fix skb->pkt_type when mac address of slave interface differs
  net: fix setting of skb->tail in skb_recycle_check()
  net: fix /proc/net/snmp as memory corruptor
  mac80211: fix a buffer overrun in station debug code
  netfilter: payload_len is be16, add size of struct rather than size of pointer
  ipv6: fix ip6_mr_init error path
  [4/4] dca: fixup initialization dependency
  [3/4] I/OAT: fix async_tx.callback checking
  [2/4] I/OAT: fix dma_pin_iovec_pages() error handling
  [1/4] I/OAT: fix channel resources free for not allocated channels
  ssb: Fix DMA-API compilation for non-PCI systems
  SSB: hide empty sub menu
  vlan: Fix typos in proc output string
  [netdrvr] usb/hso: Cleanup rfkill error handling
  sfc: Correct address of gPXE boot configuration in EEPROM
  el3_common_init() should be __devinit, not __init
  hso: rfkill type should be WWAN
  mlx4_en: Start port error flow bug fix
  af_key: mark policy as dead before destroying
diff --git a/Makefile b/Makefile
index 29abe62..7f9ff9b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 28
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Killer Bat of Doom
 
 # *DOCUMENTATION*
diff --git a/arch/Kconfig b/arch/Kconfig
index e6ab550..8977d99 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -21,7 +21,7 @@
           Instruction-Based Sampling (IBS) is a new profiling
           technique that provides rich, precise program performance
           information. IBS is introduced by AMD Family10h processors
-          (AMD Opteron Quad-Core processor “Barcelona”) to overcome
+          (AMD Opteron Quad-Core processor "Barcelona") to overcome
           the limitations of conventional performance counter
           sampling.
 
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 27eec714..6bd91ed 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -148,6 +148,7 @@
 	select ACPI_NUMA
 	select SWIOTLB
 	select PCI_MSI
+	select DMAR
 	help
 	  This selects the system type of your hardware.  A "generic" kernel
 	  will run on any supported IA-64 system.  However, if you configure
@@ -585,7 +586,7 @@
 
 endmenu
 
-menu "Power management and ACPI"
+menu "Power management and ACPI options"
 
 source "kernel/power/Kconfig"
 
@@ -641,6 +642,8 @@
 
 source "drivers/Kconfig"
 
+source "arch/ia64/hp/sim/Kconfig"
+
 config MSPEC
 	tristate "Memory special operations driver"
 	depends on IA64
@@ -652,6 +655,12 @@
 
 source "fs/Kconfig"
 
+source "arch/ia64/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
 source "arch/ia64/kvm/Kconfig"
 
 source "lib/Kconfig"
@@ -678,11 +687,3 @@
 
 config IOMMU_HELPER
 	def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
-
-source "arch/ia64/hp/sim/Kconfig"
-
-source "arch/ia64/Kconfig.debug"
-
-source "security/Kconfig"
-
-source "crypto/Kconfig"
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
index 88b6e6f3..2769dbf 100644
--- a/arch/ia64/hp/common/hwsw_iommu.c
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -13,19 +13,12 @@
  */
 
 #include <linux/device.h>
+#include <linux/swiotlb.h>
 
 #include <asm/machvec.h>
 
 /* swiotlb declarations & definitions: */
 extern int swiotlb_late_init_with_default_size (size_t size);
-extern ia64_mv_dma_alloc_coherent	swiotlb_alloc_coherent;
-extern ia64_mv_dma_free_coherent	swiotlb_free_coherent;
-extern ia64_mv_dma_map_single_attrs	swiotlb_map_single_attrs;
-extern ia64_mv_dma_unmap_single_attrs	swiotlb_unmap_single_attrs;
-extern ia64_mv_dma_map_sg_attrs		swiotlb_map_sg_attrs;
-extern ia64_mv_dma_unmap_sg_attrs	swiotlb_unmap_sg_attrs;
-extern ia64_mv_dma_supported		swiotlb_dma_supported;
-extern ia64_mv_dma_mapping_error	swiotlb_dma_mapping_error;
 
 /* hwiommu declarations & definitions: */
 
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index 7f25750..0d9d16e 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -434,28 +434,4 @@
 
 # endif /* __KERNEL__ */
 
-/*
- * Enabling BIO_VMERGE_BOUNDARY forces us to turn off I/O MMU bypassing.  It is said that
- * BIO-level virtual merging can give up to 4% performance boost (not verified for ia64).
- * On the other hand, we know that I/O MMU bypassing gives ~8% performance improvement on
- * SPECweb-like workloads on zx1-based machines.  Thus, for now we favor I/O MMU bypassing
- * over BIO-level virtual merging.
- */
-extern unsigned long ia64_max_iommu_merge_mask;
-#if 1
-#define BIO_VMERGE_BOUNDARY	0
-#else
-/*
- * It makes no sense at all to have this BIO_VMERGE_BOUNDARY macro here.  Should be
- * replaced by dma_merge_mask() or something of that sort.  Note: the only way
- * BIO_VMERGE_BOUNDARY is used is to mask off bits.  Effectively, our definition gets
- * expanded into:
- *
- *	addr & ((ia64_max_iommu_merge_mask + 1) - 1) == (addr & ia64_max_iommu_vmerge_mask)
- *
- * which is precisely what we want.
- */
-#define BIO_VMERGE_BOUNDARY	(ia64_max_iommu_merge_mask + 1)
-#endif
-
 #endif /* _ASM_IA64_IO_H */
diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h
index 1ea28bc..59c17e4 100644
--- a/arch/ia64/include/asm/machvec.h
+++ b/arch/ia64/include/asm/machvec.h
@@ -11,6 +11,7 @@
 #define _ASM_IA64_MACHVEC_H
 
 #include <linux/types.h>
+#include <linux/swiotlb.h>
 
 /* forward declarations: */
 struct device;
@@ -298,27 +299,6 @@
 # endif /* CONFIG_IA64_GENERIC */
 
 /*
- * Declare default routines which aren't declared anywhere else:
- */
-extern ia64_mv_dma_init			swiotlb_init;
-extern ia64_mv_dma_alloc_coherent	swiotlb_alloc_coherent;
-extern ia64_mv_dma_free_coherent	swiotlb_free_coherent;
-extern ia64_mv_dma_map_single		swiotlb_map_single;
-extern ia64_mv_dma_map_single_attrs	swiotlb_map_single_attrs;
-extern ia64_mv_dma_unmap_single		swiotlb_unmap_single;
-extern ia64_mv_dma_unmap_single_attrs	swiotlb_unmap_single_attrs;
-extern ia64_mv_dma_map_sg		swiotlb_map_sg;
-extern ia64_mv_dma_map_sg_attrs		swiotlb_map_sg_attrs;
-extern ia64_mv_dma_unmap_sg		swiotlb_unmap_sg;
-extern ia64_mv_dma_unmap_sg_attrs	swiotlb_unmap_sg_attrs;
-extern ia64_mv_dma_sync_single_for_cpu	swiotlb_sync_single_for_cpu;
-extern ia64_mv_dma_sync_sg_for_cpu	swiotlb_sync_sg_for_cpu;
-extern ia64_mv_dma_sync_single_for_device swiotlb_sync_single_for_device;
-extern ia64_mv_dma_sync_sg_for_device	swiotlb_sync_sg_for_device;
-extern ia64_mv_dma_mapping_error	swiotlb_dma_mapping_error;
-extern ia64_mv_dma_supported		swiotlb_dma_supported;
-
-/*
  * Define default versions so we can extend machvec for new platforms without having
  * to update the machvec files for all existing platforms.
  */
diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h
index 6bc96ee..c0cea37 100644
--- a/arch/ia64/include/asm/meminit.h
+++ b/arch/ia64/include/asm/meminit.h
@@ -48,7 +48,6 @@
  */
 #define GRANULEROUNDDOWN(n)	((n) & ~(IA64_GRANULE_SIZE-1))
 #define GRANULEROUNDUP(n)	(((n)+IA64_GRANULE_SIZE-1) & ~(IA64_GRANULE_SIZE-1))
-#define ORDERROUNDDOWN(n)	((n) & ~((PAGE_SIZE<<MAX_ORDER)-1))
 
 #ifdef CONFIG_NUMA
   extern void call_pernode_memory (unsigned long start, unsigned long len, void *func);
diff --git a/arch/ia64/include/asm/sal.h b/arch/ia64/include/asm/sal.h
index ea310c0..966797a 100644
--- a/arch/ia64/include/asm/sal.h
+++ b/arch/ia64/include/asm/sal.h
@@ -337,11 +337,24 @@
 #define sal_log_severity_fatal		1
 #define sal_log_severity_corrected	2
 
+/*
+ * Error Recovery Info (ERI) bit decode.  From SAL Spec section B.2.2 Table B-3
+ * Error Section Error_Recovery_Info Field Definition.
+ */
+#define ERI_NOT_VALID		0x0	/* Error Recovery Field is not valid */
+#define ERI_NOT_ACCESSIBLE	0x30	/* Resource not accessible */
+#define ERI_CONTAINMENT_WARN	0x22	/* Corrupt data propagated */
+#define ERI_UNCORRECTED_ERROR	0x20	/* Uncorrected error */
+#define ERI_COMPONENT_RESET	0x24	/* Component must be reset */
+#define ERI_CORR_ERROR_LOG	0x21	/* Corrected error, needs logging */
+#define ERI_CORR_ERROR_THRESH	0x29	/* Corrected error threshold exceeded */
+
 /* Definition of log section header structures */
 typedef struct sal_log_sec_header {
     efi_guid_t guid;			/* Unique Section ID */
     sal_log_revision_t revision;	/* Major and Minor revision of Section */
-    u16 reserved;
+    u8 error_recovery_info;		/* Platform error recovery status */
+    u8 reserved;
     u32 len;				/* Section length */
 } sal_log_section_hdr_t;
 
diff --git a/arch/ia64/include/asm/sn/sn_sal.h b/arch/ia64/include/asm/sn/sn_sal.h
index 57e649d..e310fc0 100644
--- a/arch/ia64/include/asm/sn/sn_sal.h
+++ b/arch/ia64/include/asm/sn/sn_sal.h
@@ -90,6 +90,8 @@
 #define  SN_SAL_SET_CPU_NUMBER			   0x02000068
 
 #define  SN_SAL_KERNEL_LAUNCH_EVENT		   0x02000069
+#define  SN_SAL_WATCHLIST_ALLOC			   0x02000070
+#define  SN_SAL_WATCHLIST_FREE			   0x02000071
 
 /*
  * Service-specific constants
@@ -1185,4 +1187,47 @@
 	SAL_CALL_NOLOCK(rv, SN_SAL_KERNEL_LAUNCH_EVENT, 0, 0, 0, 0, 0, 0, 0);
 	return rv.status;
 }
+
+union sn_watchlist_u {
+	u64     val;
+	struct {
+		u64	blade	: 16,
+			size	: 32,
+			filler	: 16;
+	};
+};
+
+static inline int
+sn_mq_watchlist_alloc(int blade, void *mq, unsigned int mq_size,
+				unsigned long *intr_mmr_offset)
+{
+	struct ia64_sal_retval rv;
+	unsigned long addr;
+	union sn_watchlist_u size_blade;
+	int watchlist;
+
+	addr = (unsigned long)mq;
+	size_blade.size = mq_size;
+	size_blade.blade = blade;
+
+	/*
+	 * bios returns watchlist number or negative error number.
+	 */
+	ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_ALLOC, addr,
+			size_blade.val, (u64)intr_mmr_offset,
+			(u64)&watchlist, 0, 0, 0);
+	if (rv.status < 0)
+		return rv.status;
+
+	return watchlist;
+}
+
+static inline int
+sn_mq_watchlist_free(int blade, int watchlist_num)
+{
+	struct ia64_sal_retval rv;
+	ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_FREE, blade,
+			watchlist_num, 0, 0, 0, 0, 0);
+	return rv.status;
+}
 #endif /* _ASM_IA64_SN_SN_SAL_H */
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 0635015..bd7acc7 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -678,6 +678,30 @@
 	return 0;
 }
 
+int __init early_acpi_boot_init(void)
+{
+	int ret;
+
+	/*
+	 * do a partial walk of MADT to determine how many CPUs
+	 * we have including offline CPUs
+	 */
+	if (acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) {
+		printk(KERN_ERR PREFIX "Can't find MADT\n");
+		return 0;
+	}
+
+	ret = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
+		acpi_parse_lsapic, NR_CPUS);
+	if (ret < 1)
+		printk(KERN_ERR PREFIX
+		       "Error parsing MADT - no LAPIC entries\n");
+
+	return 0;
+}
+
+
+
 int __init acpi_boot_init(void)
 {
 
@@ -701,11 +725,6 @@
 		printk(KERN_ERR PREFIX
 		       "Error parsing LAPIC address override entry\n");
 
-	if (acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, acpi_parse_lsapic, NR_CPUS)
-	    < 1)
-		printk(KERN_ERR PREFIX
-		       "Error parsing MADT - no LAPIC entries\n");
-
 	if (acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0)
 	    < 0)
 		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index 031abbf..dbdb778 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -12,13 +12,11 @@
 #include <asm/machvec.h>
 #include <linux/dma-mapping.h>
 
-#include <asm/machvec.h>
 #include <asm/system.h>
 
 #ifdef CONFIG_DMAR
 
 #include <linux/kernel.h>
-#include <linux/string.h>
 
 #include <asm/page.h>
 #include <asm/iommu.h>
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index ae79117..865af27 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -359,7 +359,7 @@
 	}
 #endif
 
-#ifdef CONFIG_CRASH_KERNEL
+#ifdef CONFIG_CRASH_DUMP
 	if (reserve_elfcorehdr(&rsvd_region[n].start,
 			       &rsvd_region[n].end) == 0)
 		n++;
@@ -561,8 +561,12 @@
 #ifdef CONFIG_ACPI
 	/* Initialize the ACPI boot-time table parser */
 	acpi_table_init();
+	early_acpi_boot_init();
 # ifdef CONFIG_ACPI_NUMA
 	acpi_numa_init();
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+	prefill_possible_map();
+#endif
 	per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
 		32 : cpus_weight(early_cpu_possible_map)),
 		additional_cpus > 0 ? additional_cpus : 0);
@@ -853,9 +857,6 @@
 setup_per_cpu_areas (void)
 {
 	/* start_kernel() requires this... */
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
-	prefill_possible_map();
-#endif
 }
 
 /*
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index d8c5fcd..d85ba98 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -635,7 +635,6 @@
 			(min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
 #endif
 	start = GRANULEROUNDDOWN(start);
-	start = ORDERROUNDDOWN(start);
 	end = GRANULEROUNDUP(end);
 	mem_data[node].max_pfn = max(mem_data[node].max_pfn,
 				     end >> PAGE_SHIFT);
diff --git a/arch/ia64/uv/kernel/setup.c b/arch/ia64/uv/kernel/setup.c
index cf5f28a..7a5ae63 100644
--- a/arch/ia64/uv/kernel/setup.c
+++ b/arch/ia64/uv/kernel/setup.c
@@ -19,6 +19,12 @@
 
 #ifdef CONFIG_IA64_SGI_UV
 int sn_prom_type;
+long sn_partition_id;
+EXPORT_SYMBOL(sn_partition_id);
+long sn_coherency_id;
+EXPORT_SYMBOL_GPL(sn_coherency_id);
+long sn_region_size;
+EXPORT_SYMBOL(sn_region_size);
 #endif
 
 struct redir_addr {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4cf0ab1..93224b5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -957,7 +957,7 @@
 config NUMA
 	bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
 	depends on SMP
-	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
+	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && BROKEN)
 	default n if X86_PC
 	default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
 	help
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 46be2fa..c2a812e 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -108,9 +108,7 @@
 {
 	DECLARE_ARGS(val, low, high);
 
-	rdtsc_barrier();
 	asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
-	rdtsc_barrier();
 
 	return EAX_EDX_VAL(val, low, high);
 }
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 38ae163..9cd83a8 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -34,6 +34,8 @@
 
 static __always_inline cycles_t vget_cycles(void)
 {
+	cycles_t cycles;
+
 	/*
 	 * We only do VDSOs on TSC capable CPUs, so this shouldnt
 	 * access boot_cpu_data (which is not VDSO-safe):
@@ -42,7 +44,11 @@
 	if (!cpu_has_tsc)
 		return 0;
 #endif
-	return (cycles_t)__native_read_tsc();
+	rdtsc_barrier();
+	cycles = (cycles_t)__native_read_tsc();
+	rdtsc_barrier();
+
+	return cycles;
 }
 
 extern void tsc_init(void);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 2ef80e3..424093b 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -55,7 +55,7 @@
 	rdtscll(this_offset);
 
 	/* return the value in ns */
-	return cycles_2_ns(this_offset);
+	return __cycles_2_ns(this_offset);
 }
 
 /* We need to define a real function for sched_clock, to override the
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 0620d6d..3f1b81a 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -27,8 +27,7 @@
 static int counter_width = 32;
 
 #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
-#define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1))))
+#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
 
 #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
 #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
@@ -124,14 +123,14 @@
 static int ppro_check_ctrs(struct pt_regs * const regs,
 			   struct op_msrs const * const msrs)
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 
 	for (i = 0 ; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
-		CTR_READ(low, high, msrs, i);
-		if (CTR_OVERFLOWED(low)) {
+		rdmsrl(msrs->counters[i].addr, val);
+		if (CTR_OVERFLOWED(val)) {
 			oprofile_add_sample(regs, i);
 			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 		}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b61534c..5e4686d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -863,15 +863,16 @@
 	if (PagePinned(virt_to_page(mm->pgd))) {
 		SetPagePinned(page);
 
+		vm_unmap_aliases();
 		if (!PageHighMem(page)) {
 			make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
 			if (level == PT_PTE && USE_SPLIT_PTLOCKS)
 				pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
-		} else
+		} else {
 			/* make sure there are no stray mappings of
 			   this page */
 			kmap_flush_unused();
-			vm_unmap_aliases();
+		}
 	}
 }
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index aba77b2..6889360 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -850,13 +850,16 @@
    read-only, and can be pinned. */
 static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
 {
+	vm_unmap_aliases();
+
 	xen_mc_batch();
 
-	if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
-		/* re-enable interrupts for kmap_flush_unused */
+	 if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
+		/* re-enable interrupts for flushing */
 		xen_mc_issue(0);
+
 		kmap_flush_unused();
-		vm_unmap_aliases();
+
 		xen_mc_batch();
 	}
 
@@ -874,7 +877,7 @@
 #else /* CONFIG_X86_32 */
 #ifdef CONFIG_X86_PAE
 	/* Need to make sure unshared kernel PMD is pinnable */
-	xen_pin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
+	xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
 		     PT_PMD);
 #endif
 	xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
@@ -991,7 +994,7 @@
 
 #ifdef CONFIG_X86_PAE
 	/* Need to make sure unshared kernel PMD is unpinned */
-	xen_unpin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
+	xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
 		       PT_PMD);
 #endif
 
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 622350d..0cd3ad4 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1712,6 +1712,8 @@
 	else
 		tag = 0;
 
+	if (test_and_set_bit(tag, &ap->qc_allocated))
+		BUG();
 	qc = __ata_qc_from_tag(ap, tag);
 
 	qc->tag = tag;
@@ -4563,6 +4565,37 @@
 }
 
 /**
+ *	ata_qc_new - Request an available ATA command, for queueing
+ *	@ap: Port associated with device @dev
+ *	@dev: Device from whom we request an available command structure
+ *
+ *	LOCKING:
+ *	None.
+ */
+
+static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
+{
+	struct ata_queued_cmd *qc = NULL;
+	unsigned int i;
+
+	/* no command while frozen */
+	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
+		return NULL;
+
+	/* the last tag is reserved for internal command. */
+	for (i = 0; i < ATA_MAX_QUEUE - 1; i++)
+		if (!test_and_set_bit(i, &ap->qc_allocated)) {
+			qc = __ata_qc_from_tag(ap, i);
+			break;
+		}
+
+	if (qc)
+		qc->tag = i;
+
+	return qc;
+}
+
+/**
  *	ata_qc_new_init - Request an available ATA command, and initialize it
  *	@dev: Device from whom we request an available command structure
  *	@tag: command tag
@@ -4571,20 +4604,16 @@
  *	None.
  */
 
-struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
+struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev)
 {
 	struct ata_port *ap = dev->link->ap;
 	struct ata_queued_cmd *qc;
 
-	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
-		return NULL;
-
-	qc = __ata_qc_from_tag(ap, tag);
+	qc = ata_qc_new(ap);
 	if (qc) {
 		qc->scsicmd = NULL;
 		qc->ap = ap;
 		qc->dev = dev;
-		qc->tag = tag;
 
 		ata_qc_reinit(qc);
 	}
@@ -4592,6 +4621,31 @@
 	return qc;
 }
 
+/**
+ *	ata_qc_free - free unused ata_queued_cmd
+ *	@qc: Command to complete
+ *
+ *	Designed to free unused ata_queued_cmd object
+ *	in case something prevents using it.
+ *
+ *	LOCKING:
+ *	spin_lock_irqsave(host lock)
+ */
+void ata_qc_free(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	unsigned int tag;
+
+	WARN_ON(qc == NULL);	/* ata_qc_from_tag _might_ return NULL */
+
+	qc->flags = 0;
+	tag = qc->tag;
+	if (likely(ata_tag_valid(tag))) {
+		qc->tag = ATA_TAG_POISON;
+		clear_bit(tag, &ap->qc_allocated);
+	}
+}
+
 void __ata_qc_complete(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 3fa75ea..47c7afc 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -709,11 +709,7 @@
 {
 	struct ata_queued_cmd *qc;
 
-	if (cmd->request->tag != -1)
-		qc = ata_qc_new_init(dev, cmd->request->tag);
-	else
-		qc = ata_qc_new_init(dev, 0);
-
+	qc = ata_qc_new_init(dev);
 	if (qc) {
 		qc->scsicmd = cmd;
 		qc->scsidone = done;
@@ -1108,17 +1104,7 @@
 
 		depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id));
 		depth = min(ATA_MAX_QUEUE - 1, depth);
-
-		/*
-		 * If this device is behind a port multiplier, we have
-		 * to share the tag map between all devices on that PMP.
-		 * Set up the shared tag map here and we get automatic.
-		 */
-		if (dev->link->ap->pmp_link)
-			scsi_init_shared_tag_map(sdev->host, ATA_MAX_QUEUE - 1);
-
-		scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
-		scsi_activate_tcq(sdev, depth);
+		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
 	}
 
 	return 0;
@@ -1958,11 +1944,6 @@
 		hdr[1] |= (1 << 7);
 
 	memcpy(rbuf, hdr, sizeof(hdr));
-
-	/* if ncq, set tags supported */
-	if (ata_id_has_ncq(args->id))
-		rbuf[7] |= (1 << 1);
-
 	memcpy(&rbuf[8], "ATA     ", 8);
 	ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16);
 	ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4);
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index d3831d3..fe2839e 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -74,7 +74,7 @@
 extern void ata_force_cbl(struct ata_port *ap);
 extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
 extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
-extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag);
+extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev);
 extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
 			   u64 block, u32 n_block, unsigned int tf_flags,
 			   unsigned int tag);
@@ -103,6 +103,7 @@
 extern int sata_down_spd_limit(struct ata_link *link);
 extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel);
 extern void ata_sg_clean(struct ata_queued_cmd *qc);
+extern void ata_qc_free(struct ata_queued_cmd *qc);
 extern void ata_qc_issue(struct ata_queued_cmd *qc);
 extern void __ata_qc_complete(struct ata_queued_cmd *qc);
 extern int atapi_check_dma(struct ata_queued_cmd *qc);
@@ -118,22 +119,6 @@
 extern void ata_dev_enable_pm(struct ata_device *dev, enum link_pm policy);
 extern void ata_lpm_schedule(struct ata_port *ap, enum link_pm);
 
-/**
- *	ata_qc_free - free unused ata_queued_cmd
- *	@qc: Command to complete
- *
- *	Designed to free unused ata_queued_cmd object
- *	in case something prevents using it.
- *
- *	LOCKING:
- *	spin_lock_irqsave(host lock)
- */
-static inline void ata_qc_free(struct ata_queued_cmd *qc)
-{
-	qc->flags = 0;
-	qc->tag = ATA_TAG_POISON;
-}
-
 /* libata-acpi.c */
 #ifdef CONFIG_ATA_ACPI
 extern void ata_acpi_associate_sata_port(struct ata_port *ap);
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 5bed733..8504a21 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -65,12 +65,14 @@
 		return;
 	}
 
+#if 0
+	/* shows regressions, re-enable for 2.6.29 */
 	/*
 	 * run any timers that can be run now, at this point
 	 * before calculating the idle duration etc.
 	 */
 	hrtimer_peek_ahead_timers();
-
+#endif
 	/* ask the governor for the next state */
 	next_state = cpuidle_curr_governor->select(dev);
 	if (need_resched())
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 3e526b6..8daf479 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -81,9 +81,9 @@
 		const struct dmi_header *dm = (const struct dmi_header *)data;
 
 		/*
-		 *  We want to know the total length (formated area and strings)
-		 *  before decoding to make sure we won't run off the table in
-		 *  dmi_decode or dmi_string
+		 *  We want to know the total length (formatted area and
+		 *  strings) before decoding to make sure we won't run off the
+		 *  table in dmi_decode or dmi_string
 		 */
 		data += dm->length;
 		while ((data - buf < len - 1) && (data[0] || data[1]))
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index a1abf95..603ffd0 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -77,12 +77,6 @@
  *  Fusion MPT LAN private structures
  */
 
-struct NAA_Hosed {
-	u16 NAA;
-	u8 ieee[FC_ALEN];
-	struct NAA_Hosed *next;
-};
-
 struct BufferControl {
 	struct sk_buff	*skb;
 	dma_addr_t	dma;
@@ -159,11 +153,6 @@
 static u32 max_buckets_out = 127;
 static u32 tx_max_out_p = 127 - 16;
 
-#ifdef QLOGIC_NAA_WORKAROUND
-static struct NAA_Hosed *mpt_bad_naa = NULL;
-DEFINE_RWLOCK(bad_naa_lock);
-#endif
-
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
  *	lan_reply - Handle all data sent from the hardware.
@@ -780,30 +769,6 @@
 //			ctx, skb, skb->data));
 
 	mac = skb_mac_header(skb);
-#ifdef QLOGIC_NAA_WORKAROUND
-{
-	struct NAA_Hosed *nh;
-
-	/* Munge the NAA for Tx packets to QLogic boards, which don't follow
-	   RFC 2625. The longer I look at this, the more my opinion of Qlogic
-	   drops. */
-	read_lock_irq(&bad_naa_lock);
-	for (nh = mpt_bad_naa; nh != NULL; nh=nh->next) {
-		if ((nh->ieee[0] == mac[0]) &&
-		    (nh->ieee[1] == mac[1]) &&
-		    (nh->ieee[2] == mac[2]) &&
-		    (nh->ieee[3] == mac[3]) &&
-		    (nh->ieee[4] == mac[4]) &&
-		    (nh->ieee[5] == mac[5])) {
-			cur_naa = nh->NAA;
-			dlprintk ((KERN_INFO "mptlan/sdu_send: using NAA value "
-				  "= %04x.\n", cur_naa));
-			break;
-		}
-	}
-	read_unlock_irq(&bad_naa_lock);
-}
-#endif
 
 	pTrans->TransactionDetails[0] = cpu_to_le32((cur_naa         << 16) |
 						    (mac[0] <<  8) |
@@ -1572,79 +1537,6 @@
 
 	fcllc = (struct fcllc *)skb->data;
 
-#ifdef QLOGIC_NAA_WORKAROUND
-{
-	u16 source_naa = fch->stype, found = 0;
-
-	/* Workaround for QLogic not following RFC 2625 in regards to the NAA
-	   value. */
-
-	if ((source_naa & 0xF000) == 0)
-		source_naa = swab16(source_naa);
-
-	if (fcllc->ethertype == htons(ETH_P_ARP))
-	    dlprintk ((KERN_INFO "mptlan/type_trans: got arp req/rep w/ naa of "
-		      "%04x.\n", source_naa));
-
-	if ((fcllc->ethertype == htons(ETH_P_ARP)) &&
-	   ((source_naa >> 12) !=  MPT_LAN_NAA_RFC2625)){
-		struct NAA_Hosed *nh, *prevnh;
-		int i;
-
-		dlprintk ((KERN_INFO "mptlan/type_trans: ARP Req/Rep from "
-			  "system with non-RFC 2625 NAA value (%04x).\n",
-			  source_naa));
-
-		write_lock_irq(&bad_naa_lock);
-		for (prevnh = nh = mpt_bad_naa; nh != NULL;
-		     prevnh=nh, nh=nh->next) {
-			if ((nh->ieee[0] == fch->saddr[0]) &&
-			    (nh->ieee[1] == fch->saddr[1]) &&
-			    (nh->ieee[2] == fch->saddr[2]) &&
-			    (nh->ieee[3] == fch->saddr[3]) &&
-			    (nh->ieee[4] == fch->saddr[4]) &&
-			    (nh->ieee[5] == fch->saddr[5])) {
-				found = 1;
-				dlprintk ((KERN_INFO "mptlan/type_trans: ARP Re"
-					 "q/Rep w/ bad NAA from system already"
-					 " in DB.\n"));
-				break;
-			}
-		}
-
-		if ((!found) && (nh == NULL)) {
-
-			nh = kmalloc(sizeof(struct NAA_Hosed), GFP_KERNEL);
-			dlprintk ((KERN_INFO "mptlan/type_trans: ARP Req/Rep w/"
-				 " bad NAA from system not yet in DB.\n"));
-
-			if (nh != NULL) {
-				nh->next = NULL;
-				if (!mpt_bad_naa)
-					mpt_bad_naa = nh;
-				if (prevnh)
-					prevnh->next = nh;
-
-				nh->NAA = source_naa; /* Set the S_NAA value. */
-				for (i = 0; i < FC_ALEN; i++)
-					nh->ieee[i] = fch->saddr[i];
-				dlprintk ((KERN_INFO "Got ARP from %02x:%02x:%02x:%02x:"
-					  "%02x:%02x with non-compliant S_NAA value.\n",
-					  fch->saddr[0], fch->saddr[1], fch->saddr[2],
-					  fch->saddr[3], fch->saddr[4],fch->saddr[5]));
-			} else {
-				printk (KERN_ERR "mptlan/type_trans: Unable to"
-					" kmalloc a NAA_Hosed struct.\n");
-			}
-		} else if (!found) {
-			printk (KERN_ERR "mptlan/type_trans: found not"
-				" set, but nh isn't null. Evil "
-				"funkiness abounds.\n");
-		}
-		write_unlock_irq(&bad_naa_lock);
-	}
-}
-#endif
 
 	/* Strip the SNAP header from ARP packets since we don't
 	 * pass them through to the 802.2/SNAP layers.
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 0d9b2d6..f210a8e 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -216,8 +216,7 @@
 	int ret;
 	const char *type;
 
-	snprintf(card->dev.bus_id, sizeof(card->dev.bus_id),
-		 "%s:%04x", mmc_hostname(card->host), card->rca);
+	dev_set_name(&card->dev, "%s:%04x", mmc_hostname(card->host), card->rca);
 
 	switch (card->type) {
 	case MMC_TYPE_MMC:
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 044d84e..f7284b9 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -280,7 +280,11 @@
 			(card->host->ios.clock / 1000);
 
 		if (data->flags & MMC_DATA_WRITE)
-			limit_us = 250000;
+			/*
+			 * The limit is really 250 ms, but that is
+			 * insufficient for some crappy cards.
+			 */
+			limit_us = 300000;
 		else
 			limit_us = 100000;
 
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 6da80fd4..5e945e6 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -73,8 +73,7 @@
 	if (err)
 		goto free;
 
-	snprintf(host->class_dev.bus_id, BUS_ID_SIZE,
-		 "mmc%d", host->index);
+	dev_set_name(&host->class_dev, "mmc%d", host->index);
 
 	host->parent = dev;
 	host->class_dev.parent = dev;
@@ -121,7 +120,7 @@
 	WARN_ON((host->caps & MMC_CAP_SDIO_IRQ) &&
 		!host->ops->enable_sdio_irq);
 
-	led_trigger_register_simple(host->class_dev.bus_id, &host->led);
+	led_trigger_register_simple(dev_name(&host->class_dev), &host->led);
 
 	err = device_add(&host->class_dev);
 	if (err)
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 233d0f9..46284b5 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -239,8 +239,7 @@
 {
 	int ret;
 
-	snprintf(func->dev.bus_id, sizeof(func->dev.bus_id),
-		 "%s:%d", mmc_card_id(func->card), func->num);
+	dev_set_name(&func->dev, "%s:%d", mmc_card_id(func->card), func->num);
 
 	ret = device_add(&func->dev);
 	if (ret == 0)
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 07faf54..ad00e16 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1348,7 +1348,7 @@
 		goto fail_add_host;
 
 	dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n",
-			mmc->class_dev.bus_id,
+			dev_name(&mmc->class_dev),
 			host->dma_dev ? "" : ", no DMA",
 			(host->pdata && host->pdata->get_ro)
 				? "" : ", no WP",
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 30f64b1..4d010a9 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1733,7 +1733,7 @@
 	mmc_add_host(mmc);
 
 	printk(KERN_INFO "%s: SDHCI controller on %s [%s] using %s%s\n",
-		mmc_hostname(mmc), host->hw_name, mmc_dev(mmc)->bus_id,
+		mmc_hostname(mmc), host->hw_name, dev_name(mmc_dev(mmc)),
 		(host->flags & SDHCI_USE_ADMA)?"A":"",
 		(host->flags & SDHCI_USE_DMA)?"DMA":"PIO");
 
diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c
index 1384484..82554dd 100644
--- a/drivers/mmc/host/tifm_sd.c
+++ b/drivers/mmc/host/tifm_sd.c
@@ -632,7 +632,7 @@
 
 	if (host->req) {
 		printk(KERN_ERR "%s : unfinished request detected\n",
-		       sock->dev.bus_id);
+		       dev_name(&sock->dev));
 		mrq->cmd->error = -ETIMEDOUT;
 		goto err_out;
 	}
@@ -672,7 +672,7 @@
 					    ? PCI_DMA_TODEVICE
 					    : PCI_DMA_FROMDEVICE)) {
 				printk(KERN_ERR "%s : scatterlist map failed\n",
-				       sock->dev.bus_id);
+				       dev_name(&sock->dev));
 				mrq->cmd->error = -ENOMEM;
 				goto err_out;
 			}
@@ -684,7 +684,7 @@
 						   : PCI_DMA_FROMDEVICE);
 			if (host->sg_len < 1) {
 				printk(KERN_ERR "%s : scatterlist map failed\n",
-				       sock->dev.bus_id);
+				       dev_name(&sock->dev));
 				tifm_unmap_sg(sock, &host->bounce_buf, 1,
 					      r_data->flags & MMC_DATA_WRITE
 					      ? PCI_DMA_TODEVICE
@@ -748,7 +748,7 @@
 
 	if (!mrq) {
 		printk(KERN_ERR " %s : no request to complete?\n",
-		       sock->dev.bus_id);
+		       dev_name(&sock->dev));
 		spin_unlock_irqrestore(&sock->lock, flags);
 		return;
 	}
@@ -789,7 +789,7 @@
 	printk(KERN_ERR
 	       "%s : card failed to respond for a long period of time "
 	       "(%x, %x)\n",
-	       host->dev->dev.bus_id, host->req->cmd->opcode, host->cmd_flags);
+	       dev_name(&host->dev->dev), host->req->cmd->opcode, host->cmd_flags);
 
 	tifm_eject(host->dev);
 }
@@ -906,7 +906,7 @@
 
 	if (rc) {
 		printk(KERN_ERR "%s : controller failed to reset\n",
-		       sock->dev.bus_id);
+		       dev_name(&sock->dev));
 		return -ENODEV;
 	}
 
@@ -933,7 +933,7 @@
 	if (rc) {
 		printk(KERN_ERR
 		       "%s : card not ready - probe failed on initialization\n",
-		       sock->dev.bus_id);
+		       dev_name(&sock->dev));
 		return -ENODEV;
 	}
 
@@ -954,7 +954,7 @@
 	if (!(TIFM_SOCK_STATE_OCCUPIED
 	      & readl(sock->addr + SOCK_PRESENT_STATE))) {
 		printk(KERN_WARNING "%s : card gone, unexpectedly\n",
-		       sock->dev.bus_id);
+		       dev_name(&sock->dev));
 		return rc;
 	}
 
diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c
index d962ba0..191a320 100644
--- a/drivers/oprofile/event_buffer.c
+++ b/drivers/oprofile/event_buffer.c
@@ -105,7 +105,7 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (test_and_set_bit(0, &buffer_opened))
+	if (test_and_set_bit_lock(0, &buffer_opened))
 		return -EBUSY;
 
 	/* Register as a user of dcookies
@@ -129,7 +129,7 @@
 fail:
 	dcookie_unregister(file->private_data);
 out:
-	clear_bit(0, &buffer_opened);
+	__clear_bit_unlock(0, &buffer_opened);
 	return err;
 }
 
@@ -141,7 +141,7 @@
 	dcookie_unregister(file->private_data);
 	buffer_pos = 0;
 	atomic_set(&buffer_ready, 0);
-	clear_bit(0, &buffer_opened);
+	__clear_bit_unlock(0, &buffer_opened);
 	return 0;
 }
 
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 110022d..5d72866 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -575,7 +575,7 @@
 
 	nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 	start = vma->vm_pgoff;
-	size = pci_resource_len(pdev, resno) >> PAGE_SHIFT;
+	size = ((pci_resource_len(pdev, resno) - 1) >> PAGE_SHIFT) + 1;
 	if (start < size && size - start >= nr)
 		return 1;
 	WARN(1, "process \"%s\" tried to map 0x%08lx-0x%08lx on %s BAR %d (size 0x%08lx)\n",
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index bbf66ea..5049a47 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -1692,24 +1692,24 @@
 	}
 }
 
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM,
-			 PCI_DEVICE_ID_NX2_5706,
-			 quirk_brcm_570x_limit_vpd);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM,
-			 PCI_DEVICE_ID_NX2_5706S,
-			 quirk_brcm_570x_limit_vpd);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM,
-			 PCI_DEVICE_ID_NX2_5708,
-			 quirk_brcm_570x_limit_vpd);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM,
-			 PCI_DEVICE_ID_NX2_5708S,
-			 quirk_brcm_570x_limit_vpd);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM,
-			 PCI_DEVICE_ID_NX2_5709,
-			 quirk_brcm_570x_limit_vpd);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM,
-			 PCI_DEVICE_ID_NX2_5709S,
-			 quirk_brcm_570x_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
+			PCI_DEVICE_ID_NX2_5706,
+			quirk_brcm_570x_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
+			PCI_DEVICE_ID_NX2_5706S,
+			quirk_brcm_570x_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
+			PCI_DEVICE_ID_NX2_5708,
+			quirk_brcm_570x_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
+			PCI_DEVICE_ID_NX2_5708S,
+			quirk_brcm_570x_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
+			PCI_DEVICE_ID_NX2_5709,
+			quirk_brcm_570x_limit_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
+			PCI_DEVICE_ID_NX2_5709S,
+			quirk_brcm_570x_limit_vpd);
 
 #ifdef CONFIG_PCI_MSI
 /* Some chipsets do not support MSI. We cannot easily rely on setting
diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index 1f5f614..132a781 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -100,7 +100,8 @@
  * pci_map_rom - map a PCI ROM to kernel space
  * @pdev: pointer to pci device struct
  * @size: pointer to receive size of pci window over ROM
- * @return: kernel virtual pointer to image of ROM
+ *
+ * Return: kernel virtual pointer to image of ROM
  *
  * Map a PCI ROM into kernel space. If ROM is boot video ROM,
  * the shadow BIOS copy will be returned instead of the
@@ -167,7 +168,8 @@
  * pci_map_rom_copy - map a PCI ROM to kernel space, create a copy
  * @pdev: pointer to pci device struct
  * @size: pointer to receive size of pci window over ROM
- * @return: kernel virtual pointer to image of ROM
+ *
+ * Return: kernel virtual pointer to image of ROM
  *
  * Map a PCI ROM into kernel space. If ROM is boot video ROM,
  * the shadow BIOS copy will be returned instead of the
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 4dada6e..39360e2 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1,6 +1,4 @@
-menu "Voltage and Current regulators"
-
-config REGULATOR
+menuconfig REGULATOR
 	bool "Voltage and Current Regulator Support"
 	default n
 	help
@@ -23,21 +21,20 @@
 
 	  If unsure, say no.
 
+if REGULATOR
+
 config REGULATOR_DEBUG
 	bool "Regulator debug support"
-	depends on REGULATOR
 	help
 	  Say yes here to enable debugging support.
 
 config REGULATOR_FIXED_VOLTAGE
 	tristate
 	default n
-	select REGULATOR
 
 config REGULATOR_VIRTUAL_CONSUMER
 	tristate "Virtual regulator consumer support"
 	default n
-	select REGULATOR
 	help
 	  This driver provides a virtual consumer for the voltage and
           current regulator API which provides sysfs controls for
@@ -49,7 +46,6 @@
 config REGULATOR_BQ24022
 	tristate "TI bq24022 Dual Input 1-Cell Li-Ion Charger IC"
 	default n
-	select REGULATOR
 	help
 	  This driver controls a TI bq24022 Charger attached via
 	  GPIOs. The provided current regulator can enable/disable
@@ -59,7 +55,6 @@
 config REGULATOR_WM8350
 	tristate "Wolfson Microelectroncis WM8350 AudioPlus PMIC"
 	depends on MFD_WM8350
-	select REGULATOR
 	help
 	  This driver provides support for the voltage and current regulators
           of the WM8350 AudioPlus PMIC.
@@ -67,7 +62,6 @@
 config REGULATOR_WM8400
 	tristate "Wolfson Microelectroncis WM8400 AudioPlus PMIC"
 	depends on MFD_WM8400
-	select REGULATOR
 	help
 	  This driver provides support for the voltage regulators of the
 	  WM8400 AudioPlus PMIC.
@@ -75,9 +69,8 @@
 config REGULATOR_DA903X
 	tristate "Support regulators on Dialog Semiconductor DA9030/DA9034 PMIC"
 	depends on PMIC_DA903X
-	select REGULATOR
 	help
 	  Say y here to support the BUCKs and LDOs regulators found on
 	  Dialog Semiconductor DA9030/DA9034 PMIC.
 
-endmenu
+endif
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index e1654f5..c95b286 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -21,7 +21,23 @@
 
 	  If in doubt, say N here.
 
-if STAGING
+
+config STAGING_EXCLUDE_BUILD
+	bool "Exclude Staging drivers from being built" if STAGING
+	default y
+	---help---
+	  Are you sure you really want to build the staging drivers?
+	  They taint your kernel, don't live up to the normal Linux
+	  kernel quality standards, are a bit crufty around the edges,
+	  and might go off and kick your dog when you aren't paying
+	  attention.
+
+	  Say N here to be able to select and build the Staging drivers.
+	  This option is primarily here to prevent them from being built
+	  when selecting 'make allyesconfg' and 'make allmodconfig' so
+	  don't be all that put off, your dog will be just fine.
+
+if !STAGING_EXCLUDE_BUILD
 
 source "drivers/staging/et131x/Kconfig"
 
@@ -45,4 +61,4 @@
 
 source "drivers/staging/poch/Kconfig"
 
-endif # STAGING
+endif # !STAGING_EXCLUDE_BUILD
diff --git a/drivers/staging/usbip/Kconfig b/drivers/staging/usbip/Kconfig
index 7426235..217fb7e 100644
--- a/drivers/staging/usbip/Kconfig
+++ b/drivers/staging/usbip/Kconfig
@@ -1,6 +1,6 @@
 config USB_IP_COMMON
 	tristate "USB IP support (EXPERIMENTAL)"
-	depends on USB && EXPERIMENTAL
+	depends on USB && NET && EXPERIMENTAL
 	default N
 	---help---
 	  This enables pushing USB packets over IP to allow remote
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index fe34d74..2a117e2 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -718,6 +718,8 @@
 			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 			free = ext4_free_blocks_after_init(sb, group, gdp);
 			gdp->bg_free_blocks_count = cpu_to_le16(free);
+			gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
+								gdp);
 		}
 		spin_unlock(sb_bgl_lock(sbi, group));
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8dbf695..be21a5a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2329,6 +2329,8 @@
 			unlock_page(page);
 			return 0;
 		}
+		/* now mark the buffer_heads as dirty and uptodate */
+		block_commit_write(page, 0, PAGE_CACHE_SIZE);
 	}
 
 	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -4580,9 +4582,10 @@
 static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 {
 	if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
-		return ext4_indirect_trans_blocks(inode, nrblocks, 0);
-	return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
+		return ext4_indirect_trans_blocks(inode, nrblocks, chunk);
+	return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
 }
+
 /*
  * Account for index blocks, block groups bitmaps and block group
  * descriptor blocks if modify datablocks and index blocks
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index dfe17a1..444ad99 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4441,6 +4441,7 @@
 		else if (block >= (entry->start_blk + entry->count))
 			n = &(*n)->rb_right;
 		else {
+			ext4_unlock_group(sb, group);
 			ext4_error(sb, __func__,
 			    "Double free of blocks %d (%d %d)\n",
 			    block, entry->start_blk, entry->count);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 994859d..e4a241c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1458,9 +1458,8 @@
 
 	/* We allocate both existing and potentially added groups */
 	flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
-			    ((sbi->s_es->s_reserved_gdt_blocks +1 ) <<
-			      EXT4_DESC_PER_BLOCK_BITS(sb))) /
-			   groups_per_flex;
+			((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
+			      EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
 	sbi->s_flex_groups = kzalloc(flex_group_count *
 				     sizeof(struct flex_groups), GFP_KERNEL);
 	if (sbi->s_flex_groups == NULL) {
@@ -2885,12 +2884,9 @@
 /*
  * Ext4 always journals updates to the superblock itself, so we don't
  * have to propagate any other updates to the superblock on disk at this
- * point.  Just start an async writeback to get the buffers on their way
- * to the disk.
- *
- * This implicitly triggers the writebehind on sync().
+ * point.  (We can probably nuke this function altogether, and remove
+ * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
  */
-
 static void ext4_write_super(struct super_block *sb)
 {
 	if (mutex_trylock(&sb->s_lock) != 0)
@@ -2900,15 +2896,15 @@
 
 static int ext4_sync_fs(struct super_block *sb, int wait)
 {
-	tid_t target;
+	int ret = 0;
 
 	trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
 	sb->s_dirt = 0;
-	if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
-		if (wait)
-			jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
-	}
-	return 0;
+	if (wait)
+		ret = ext4_force_commit(sb);
+	else
+		jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
+	return ret;
 }
 
 /*
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 1bd8d4a..61f32f3 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -115,7 +115,7 @@
  */
 void __log_wait_for_space(journal_t *journal)
 {
-	int nblocks;
+	int nblocks, space_left;
 	assert_spin_locked(&journal->j_state_lock);
 
 	nblocks = jbd_space_needed(journal);
@@ -128,25 +128,42 @@
 		/*
 		 * Test again, another process may have checkpointed while we
 		 * were waiting for the checkpoint lock. If there are no
-		 * outstanding transactions there is nothing to checkpoint and
-		 * we can't make progress. Abort the journal in this case.
+		 * transactions ready to be checkpointed, try to recover
+		 * journal space by calling cleanup_journal_tail(), and if
+		 * that doesn't work, by waiting for the currently committing
+		 * transaction to complete.  If there is absolutely no way
+		 * to make progress, this is either a BUG or corrupted
+		 * filesystem, so abort the journal and leave a stack
+		 * trace for forensic evidence.
 		 */
 		spin_lock(&journal->j_state_lock);
 		spin_lock(&journal->j_list_lock);
 		nblocks = jbd_space_needed(journal);
-		if (__log_space_left(journal) < nblocks) {
+		space_left = __log_space_left(journal);
+		if (space_left < nblocks) {
 			int chkpt = journal->j_checkpoint_transactions != NULL;
+			tid_t tid = 0;
 
+			if (journal->j_committing_transaction)
+				tid = journal->j_committing_transaction->t_tid;
 			spin_unlock(&journal->j_list_lock);
 			spin_unlock(&journal->j_state_lock);
 			if (chkpt) {
 				log_do_checkpoint(journal);
+			} else if (cleanup_journal_tail(journal) == 0) {
+				/* We were able to recover space; yay! */
+				;
+			} else if (tid) {
+				log_wait_commit(journal, tid);
 			} else {
-				printk(KERN_ERR "%s: no transactions\n",
-				       __func__);
+				printk(KERN_ERR "%s: needed %d blocks and "
+				       "only had %d space available\n",
+				       __func__, nblocks, space_left);
+				printk(KERN_ERR "%s: no way to get more "
+				       "journal space\n", __func__);
+				WARN_ON(1);
 				journal_abort(journal, 0);
 			}
-
 			spin_lock(&journal->j_state_lock);
 		} else {
 			spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 9203c33..9497718 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -116,7 +116,7 @@
  */
 void __jbd2_log_wait_for_space(journal_t *journal)
 {
-	int nblocks;
+	int nblocks, space_left;
 	assert_spin_locked(&journal->j_state_lock);
 
 	nblocks = jbd_space_needed(journal);
@@ -129,25 +129,43 @@
 		/*
 		 * Test again, another process may have checkpointed while we
 		 * were waiting for the checkpoint lock. If there are no
-		 * outstanding transactions there is nothing to checkpoint and
-		 * we can't make progress. Abort the journal in this case.
+		 * transactions ready to be checkpointed, try to recover
+		 * journal space by calling cleanup_journal_tail(), and if
+		 * that doesn't work, by waiting for the currently committing
+		 * transaction to complete.  If there is absolutely no way
+		 * to make progress, this is either a BUG or corrupted
+		 * filesystem, so abort the journal and leave a stack
+		 * trace for forensic evidence.
 		 */
 		spin_lock(&journal->j_state_lock);
 		spin_lock(&journal->j_list_lock);
 		nblocks = jbd_space_needed(journal);
-		if (__jbd2_log_space_left(journal) < nblocks) {
+		space_left = __jbd2_log_space_left(journal);
+		if (space_left < nblocks) {
 			int chkpt = journal->j_checkpoint_transactions != NULL;
+			tid_t tid = 0;
 
+			if (journal->j_committing_transaction)
+				tid = journal->j_committing_transaction->t_tid;
 			spin_unlock(&journal->j_list_lock);
 			spin_unlock(&journal->j_state_lock);
 			if (chkpt) {
 				jbd2_log_do_checkpoint(journal);
+			} else if (jbd2_cleanup_journal_tail(journal) == 0) {
+				/* We were able to recover space; yay! */
+				;
+			} else if (tid) {
+				jbd2_log_wait_commit(journal, tid);
 			} else {
-				printk(KERN_ERR "%s: no transactions\n",
-				       __func__);
+				printk(KERN_ERR "%s: needed %d blocks and "
+				       "only had %d space available\n",
+				       __func__, nblocks, space_left);
+				printk(KERN_ERR "%s: no way to get more "
+				       "journal space in %s\n", __func__,
+				       journal->j_devname);
+				WARN_ON(1);
 				jbd2_journal_abort(journal, 0);
 			}
-
 			spin_lock(&journal->j_state_lock);
 		} else {
 			spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 783de11..e70d657 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1089,6 +1089,7 @@
 	if (!journal->j_wbuf) {
 		printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
 			__func__);
+		jbd2_stats_proc_exit(journal);
 		kfree(journal);
 		return NULL;
 	}
@@ -1098,6 +1099,7 @@
 	if (err) {
 		printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
 		       __func__);
+		jbd2_stats_proc_exit(journal);
 		kfree(journal);
 		return NULL;
 	}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 848a03e..4433c8f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1875,11 +1875,11 @@
 		return -ENOMEM;
 
 	offset = *offsetp;
-	cdp->err = nfserr_eof; /* will be cleared on successful read */
 
 	while (1) {
 		unsigned int reclen;
 
+		cdp->err = nfserr_eof; /* will be cleared on successful read */
 		buf.used = 0;
 		buf.full = 0;
 
@@ -1912,9 +1912,6 @@
 			de = (struct buffered_dirent *)((char *)de + reclen);
 		}
 		offset = vfs_llseek(file, 0, SEEK_CUR);
-		cdp->err = nfserr_eof;
-		if (!buf.full)
-			break;
 	}
 
  done:
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index ae060c6..18546d8 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -34,7 +34,7 @@
 
 #define __pfn_to_page(pfn)			\
 ({	unsigned long __pfn = (pfn);		\
-	unsigned long __nid = arch_pfn_to_nid(pfn);  \
+	unsigned long __nid = arch_pfn_to_nid(__pfn);  \
 	NODE_DATA(__nid)->node_mem_map + arch_local_page_offset(__pfn, __nid);\
 })
 
diff --git a/include/linux/cnt32_to_63.h b/include/linux/cnt32_to_63.h
index 8c0f950..7605fdd 100644
--- a/include/linux/cnt32_to_63.h
+++ b/include/linux/cnt32_to_63.h
@@ -16,6 +16,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <asm/byteorder.h>
+#include <asm/system.h>
 
 /* this is used only to give gcc a clue about good code generation */
 union cnt32_to_63 {
@@ -53,11 +54,19 @@
  * needed increment.  And any race in updating the value in memory is harmless
  * as the same value would simply be stored more than once.
  *
- * The only restriction for the algorithm to work properly is that this
- * code must be executed at least once per each half period of the 32-bit
- * counter to properly update the state bit in memory. This is usually not a
- * problem in practice, but if it is then a kernel timer could be scheduled
- * to manage for this code to be executed often enough.
+ * The restrictions for the algorithm to work properly are:
+ *
+ * 1) this code must be called at least once per each half period of the
+ *    32-bit counter;
+ *
+ * 2) this code must not be preempted for a duration longer than the
+ *    32-bit counter half period minus the longest period between two
+ *    calls to this code.
+ *
+ * Those requirements ensure proper update to the state bit in memory.
+ * This is usually not a problem in practice, but if it is then a kernel
+ * timer should be scheduled to manage for this code to be executed often
+ * enough.
  *
  * Note that the top bit (bit 63) in the returned value should be considered
  * as garbage.  It is not cleared here because callers are likely to use a
@@ -68,9 +77,10 @@
  */
 #define cnt32_to_63(cnt_lo) \
 ({ \
-	static volatile u32 __m_cnt_hi; \
+	static u32 __m_cnt_hi; \
 	union cnt32_to_63 __x; \
 	__x.hi = __m_cnt_hi; \
+ 	smp_rmb(); \
 	__x.lo = (cnt_lo); \
 	if (unlikely((s32)(__x.hi ^ __x.lo) < 0)) \
 		__m_cnt_hi = __x.hi = (__x.hi ^ 0x80000000) + (__x.hi >> 31); \
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d3219d7..21e1dd4 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -5,6 +5,9 @@
  * Cpumasks provide a bitmap suitable for representing the
  * set of CPU's in a system, one bit position per CPU number.
  *
+ * The new cpumask_ ops take a "struct cpumask *"; the old ones
+ * use cpumask_t.
+ *
  * See detailed comments in the file linux/bitmap.h describing the
  * data type on which these cpumasks are based.
  *
@@ -31,7 +34,7 @@
  *       will span the entire range of NR_CPUS.
  * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
  *
- * The available cpumask operations are:
+ * The obsolescent cpumask operations are:
  *
  * void cpu_set(cpu, mask)		turn on bit 'cpu' in mask
  * void cpu_clear(cpu, mask)		turn off bit 'cpu' in mask
@@ -138,7 +141,7 @@
 #include <linux/threads.h>
 #include <linux/bitmap.h>
 
-typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
+typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
 extern cpumask_t _unused_cpumask_arg_;
 
 #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
@@ -527,4 +530,556 @@
 #define for_each_online_cpu(cpu)   for_each_cpu_mask_nr((cpu), cpu_online_map)
 #define for_each_present_cpu(cpu)  for_each_cpu_mask_nr((cpu), cpu_present_map)
 
+/* These are the new versions of the cpumask operators: passed by pointer.
+ * The older versions will be implemented in terms of these, then deleted. */
+#define cpumask_bits(maskp) ((maskp)->bits)
+
+#if NR_CPUS <= BITS_PER_LONG
+#define CPU_BITS_ALL						\
+{								\
+	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD	\
+}
+
+/* This produces more efficient code. */
+#define nr_cpumask_bits	NR_CPUS
+
+#else /* NR_CPUS > BITS_PER_LONG */
+
+#define CPU_BITS_ALL						\
+{								\
+	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,		\
+	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
+}
+
+#define nr_cpumask_bits	nr_cpu_ids
+#endif /* NR_CPUS > BITS_PER_LONG */
+
+/* verify cpu argument to cpumask_* operators */
+static inline unsigned int cpumask_check(unsigned int cpu)
+{
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+	WARN_ON_ONCE(cpu >= nr_cpumask_bits);
+#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+	return cpu;
+}
+
+#if NR_CPUS == 1
+/* Uniprocessor.  Assume all masks are "1". */
+static inline unsigned int cpumask_first(const struct cpumask *srcp)
+{
+	return 0;
+}
+
+/* Valid inputs for n are -1 and 0. */
+static inline unsigned int cpumask_next(int n, const struct cpumask *srcp)
+{
+	return n+1;
+}
+
+static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
+{
+	return n+1;
+}
+
+static inline unsigned int cpumask_next_and(int n,
+					    const struct cpumask *srcp,
+					    const struct cpumask *andp)
+{
+	return n+1;
+}
+
+/* cpu must be a valid cpu, ie 0, so there's no other choice. */
+static inline unsigned int cpumask_any_but(const struct cpumask *mask,
+					   unsigned int cpu)
+{
+	return 1;
+}
+
+#define for_each_cpu(cpu, mask)			\
+	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#define for_each_cpu_and(cpu, mask, and)	\
+	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
+#else
+/**
+ * cpumask_first - get the first cpu in a cpumask
+ * @srcp: the cpumask pointer
+ *
+ * Returns >= nr_cpu_ids if no cpus set.
+ */
+static inline unsigned int cpumask_first(const struct cpumask *srcp)
+{
+	return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_next - get the next cpu in a cpumask
+ * @n: the cpu prior to the place to search (ie. return will be > @n)
+ * @srcp: the cpumask pointer
+ *
+ * Returns >= nr_cpu_ids if no further cpus set.
+ */
+static inline unsigned int cpumask_next(int n, const struct cpumask *srcp)
+{
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpumask_check(n);
+	return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1);
+}
+
+/**
+ * cpumask_next_zero - get the next unset cpu in a cpumask
+ * @n: the cpu prior to the place to search (ie. return will be > @n)
+ * @srcp: the cpumask pointer
+ *
+ * Returns >= nr_cpu_ids if no further cpus unset.
+ */
+static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
+{
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpumask_check(n);
+	return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1);
+}
+
+int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
+int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
+
+/**
+ * for_each_cpu - iterate over every cpu in a mask
+ * @cpu: the (optionally unsigned) integer iterator
+ * @mask: the cpumask pointer
+ *
+ * After the loop, cpu is >= nr_cpu_ids.
+ */
+#define for_each_cpu(cpu, mask)				\
+	for ((cpu) = -1;				\
+		(cpu) = cpumask_next((cpu), (mask)),	\
+		(cpu) < nr_cpu_ids;)
+
+/**
+ * for_each_cpu_and - iterate over every cpu in both masks
+ * @cpu: the (optionally unsigned) integer iterator
+ * @mask: the first cpumask pointer
+ * @and: the second cpumask pointer
+ *
+ * This saves a temporary CPU mask in many places.  It is equivalent to:
+ *	struct cpumask tmp;
+ *	cpumask_and(&tmp, &mask, &and);
+ *	for_each_cpu(cpu, &tmp)
+ *		...
+ *
+ * After the loop, cpu is >= nr_cpu_ids.
+ */
+#define for_each_cpu_and(cpu, mask, and)				\
+	for ((cpu) = -1;						\
+		(cpu) = cpumask_next_and((cpu), (mask), (and)),		\
+		(cpu) < nr_cpu_ids;)
+#endif /* SMP */
+
+#define CPU_BITS_NONE						\
+{								\
+	[0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL			\
+}
+
+#define CPU_BITS_CPU0						\
+{								\
+	[0] =  1UL						\
+}
+
+/**
+ * cpumask_set_cpu - set a cpu in a cpumask
+ * @cpu: cpu number (< nr_cpu_ids)
+ * @dstp: the cpumask pointer
+ */
+static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
+{
+	set_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
+
+/**
+ * cpumask_clear_cpu - clear a cpu in a cpumask
+ * @cpu: cpu number (< nr_cpu_ids)
+ * @dstp: the cpumask pointer
+ */
+static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+	clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
+
+/**
+ * cpumask_test_cpu - test for a cpu in a cpumask
+ * @cpu: cpu number (< nr_cpu_ids)
+ * @cpumask: the cpumask pointer
+ *
+ * No static inline type checking - see Subtlety (1) above.
+ */
+#define cpumask_test_cpu(cpu, cpumask) \
+	test_bit(cpumask_check(cpu), (cpumask)->bits)
+
+/**
+ * cpumask_test_and_set_cpu - atomically test and set a cpu in a cpumask
+ * @cpu: cpu number (< nr_cpu_ids)
+ * @cpumask: the cpumask pointer
+ *
+ * test_and_set_bit wrapper for cpumasks.
+ */
+static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
+{
+	return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask));
+}
+
+/**
+ * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
+ * @dstp: the cpumask pointer
+ */
+static inline void cpumask_setall(struct cpumask *dstp)
+{
+	bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask
+ * @dstp: the cpumask pointer
+ */
+static inline void cpumask_clear(struct cpumask *dstp)
+{
+	bitmap_zero(cpumask_bits(dstp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_and - *dstp = *src1p & *src2p
+ * @dstp: the cpumask result
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline void cpumask_and(struct cpumask *dstp,
+			       const struct cpumask *src1p,
+			       const struct cpumask *src2p)
+{
+	bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p),
+				       cpumask_bits(src2p), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_or - *dstp = *src1p | *src2p
+ * @dstp: the cpumask result
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
+			      const struct cpumask *src2p)
+{
+	bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p),
+				      cpumask_bits(src2p), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_xor - *dstp = *src1p ^ *src2p
+ * @dstp: the cpumask result
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline void cpumask_xor(struct cpumask *dstp,
+			       const struct cpumask *src1p,
+			       const struct cpumask *src2p)
+{
+	bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p),
+				       cpumask_bits(src2p), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_andnot - *dstp = *src1p & ~*src2p
+ * @dstp: the cpumask result
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline void cpumask_andnot(struct cpumask *dstp,
+				  const struct cpumask *src1p,
+				  const struct cpumask *src2p)
+{
+	bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p),
+					  cpumask_bits(src2p), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_complement - *dstp = ~*srcp
+ * @dstp: the cpumask result
+ * @srcp: the input to invert
+ */
+static inline void cpumask_complement(struct cpumask *dstp,
+				      const struct cpumask *srcp)
+{
+	bitmap_complement(cpumask_bits(dstp), cpumask_bits(srcp),
+					      nr_cpumask_bits);
+}
+
+/**
+ * cpumask_equal - *src1p == *src2p
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline bool cpumask_equal(const struct cpumask *src1p,
+				const struct cpumask *src2p)
+{
+	return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p),
+						 nr_cpumask_bits);
+}
+
+/**
+ * cpumask_intersects - (*src1p & *src2p) != 0
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline bool cpumask_intersects(const struct cpumask *src1p,
+				     const struct cpumask *src2p)
+{
+	return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p),
+						      nr_cpumask_bits);
+}
+
+/**
+ * cpumask_subset - (*src1p & ~*src2p) == 0
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline int cpumask_subset(const struct cpumask *src1p,
+				 const struct cpumask *src2p)
+{
+	return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p),
+						  nr_cpumask_bits);
+}
+
+/**
+ * cpumask_empty - *srcp == 0
+ * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear.
+ */
+static inline bool cpumask_empty(const struct cpumask *srcp)
+{
+	return bitmap_empty(cpumask_bits(srcp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_full - *srcp == 0xFFFFFFFF...
+ * @srcp: the cpumask to that all cpus < nr_cpu_ids are set.
+ */
+static inline bool cpumask_full(const struct cpumask *srcp)
+{
+	return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_weight - Count of bits in *srcp
+ * @srcp: the cpumask to count bits (< nr_cpu_ids) in.
+ */
+static inline unsigned int cpumask_weight(const struct cpumask *srcp)
+{
+	return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_shift_right - *dstp = *srcp >> n
+ * @dstp: the cpumask result
+ * @srcp: the input to shift
+ * @n: the number of bits to shift by
+ */
+static inline void cpumask_shift_right(struct cpumask *dstp,
+				       const struct cpumask *srcp, int n)
+{
+	bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n,
+					       nr_cpumask_bits);
+}
+
+/**
+ * cpumask_shift_left - *dstp = *srcp << n
+ * @dstp: the cpumask result
+ * @srcp: the input to shift
+ * @n: the number of bits to shift by
+ */
+static inline void cpumask_shift_left(struct cpumask *dstp,
+				      const struct cpumask *srcp, int n)
+{
+	bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n,
+					      nr_cpumask_bits);
+}
+
+/**
+ * cpumask_copy - *dstp = *srcp
+ * @dstp: the result
+ * @srcp: the input cpumask
+ */
+static inline void cpumask_copy(struct cpumask *dstp,
+				const struct cpumask *srcp)
+{
+	bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_any - pick a "random" cpu from *srcp
+ * @srcp: the input cpumask
+ *
+ * Returns >= nr_cpu_ids if no cpus set.
+ */
+#define cpumask_any(srcp) cpumask_first(srcp)
+
+/**
+ * cpumask_first_and - return the first cpu from *srcp1 & *srcp2
+ * @src1p: the first input
+ * @src2p: the second input
+ *
+ * Returns >= nr_cpu_ids if no cpus set in both.  See also cpumask_next_and().
+ */
+#define cpumask_first_and(src1p, src2p) cpumask_next_and(-1, (src1p), (src2p))
+
+/**
+ * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2
+ * @mask1: the first input cpumask
+ * @mask2: the second input cpumask
+ *
+ * Returns >= nr_cpu_ids if no cpus set.
+ */
+#define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2))
+
+/**
+ * cpumask_of - the cpumask containing just a given cpu
+ * @cpu: the cpu (<= nr_cpu_ids)
+ */
+#define cpumask_of(cpu) (get_cpu_mask(cpu))
+
+/**
+ * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
+ * @bitmap: the bitmap
+ *
+ * There are a few places where cpumask_var_t isn't appropriate and
+ * static cpumasks must be used (eg. very early boot), yet we don't
+ * expose the definition of 'struct cpumask'.
+ *
+ * This does the conversion, and can be used as a constant initializer.
+ */
+#define to_cpumask(bitmap)						\
+	((struct cpumask *)(1 ? (bitmap)				\
+			    : (void *)sizeof(__check_is_bitmap(bitmap))))
+
+static inline int __check_is_bitmap(const unsigned long *bitmap)
+{
+	return 1;
+}
+
+/**
+ * cpumask_size - size to allocate for a 'struct cpumask' in bytes
+ *
+ * This will eventually be a runtime variable, depending on nr_cpu_ids.
+ */
+static inline size_t cpumask_size(void)
+{
+	/* FIXME: Once all cpumask assignments are eliminated, this
+	 * can be nr_cpumask_bits */
+	return BITS_TO_LONGS(NR_CPUS) * sizeof(long);
+}
+
+/*
+ * cpumask_var_t: struct cpumask for stack usage.
+ *
+ * Oh, the wicked games we play!  In order to make kernel coding a
+ * little more difficult, we typedef cpumask_var_t to an array or a
+ * pointer: doing &mask on an array is a noop, so it still works.
+ *
+ * ie.
+ *	cpumask_var_t tmpmask;
+ *	if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ *		return -ENOMEM;
+ *
+ *	  ... use 'tmpmask' like a normal struct cpumask * ...
+ *
+ *	free_cpumask_var(tmpmask);
+ */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+typedef struct cpumask *cpumask_var_t;
+
+bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
+void alloc_bootmem_cpumask_var(cpumask_var_t *mask);
+void free_cpumask_var(cpumask_var_t mask);
+void free_bootmem_cpumask_var(cpumask_var_t mask);
+
+#else
+typedef struct cpumask cpumask_var_t[1];
+
+static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+	return true;
+}
+
+static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
+{
+}
+
+static inline void free_cpumask_var(cpumask_var_t mask)
+{
+}
+
+static inline void free_bootmem_cpumask_var(cpumask_var_t mask)
+{
+}
+#endif /* CONFIG_CPUMASK_OFFSTACK */
+
+/* The pointer versions of the maps, these will become the primary versions. */
+#define cpu_possible_mask ((const struct cpumask *)&cpu_possible_map)
+#define cpu_online_mask ((const struct cpumask *)&cpu_online_map)
+#define cpu_present_mask ((const struct cpumask *)&cpu_present_map)
+#define cpu_active_mask ((const struct cpumask *)&cpu_active_map)
+
+/* It's common to want to use cpu_all_mask in struct member initializers,
+ * so it has to refer to an address rather than a pointer. */
+extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
+#define cpu_all_mask to_cpumask(cpu_all_bits)
+
+/* First bits of cpu_bit_bitmap are in fact unset. */
+#define cpu_none_mask to_cpumask(cpu_bit_bitmap[0])
+
+/* Wrappers for arch boot code to manipulate normally-constant masks */
+static inline void set_cpu_possible(unsigned int cpu, bool possible)
+{
+	if (possible)
+		cpumask_set_cpu(cpu, &cpu_possible_map);
+	else
+		cpumask_clear_cpu(cpu, &cpu_possible_map);
+}
+
+static inline void set_cpu_present(unsigned int cpu, bool present)
+{
+	if (present)
+		cpumask_set_cpu(cpu, &cpu_present_map);
+	else
+		cpumask_clear_cpu(cpu, &cpu_present_map);
+}
+
+static inline void set_cpu_online(unsigned int cpu, bool online)
+{
+	if (online)
+		cpumask_set_cpu(cpu, &cpu_online_map);
+	else
+		cpumask_clear_cpu(cpu, &cpu_online_map);
+}
+
+static inline void set_cpu_active(unsigned int cpu, bool active)
+{
+	if (active)
+		cpumask_set_cpu(cpu, &cpu_active_map);
+	else
+		cpumask_clear_cpu(cpu, &cpu_active_map);
+}
+
+static inline void init_cpu_present(const struct cpumask *src)
+{
+	cpumask_copy(&cpu_present_map, src);
+}
+
+static inline void init_cpu_possible(const struct cpumask *src)
+{
+	cpumask_copy(&cpu_possible_map, src);
+}
+
+static inline void init_cpu_online(const struct cpumask *src)
+{
+	cpumask_copy(&cpu_online_map, src);
+}
 #endif /* __LINUX_CPUMASK_H */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c7665a4..59b0f1c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -698,6 +698,7 @@
 	unsigned int		cbl;	/* cable type; ATA_CBL_xxx */
 
 	struct ata_queued_cmd	qcmd[ATA_MAX_QUEUE];
+	unsigned long		qc_allocated;
 	unsigned int		qc_active;
 	int			nr_active_links; /* #links with active qcs */
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index ee6e822..403aa50 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -130,7 +130,7 @@
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
 
 #define mmc_card_name(c)	((c)->cid.prod_name)
-#define mmc_card_id(c)		((c)->dev.bus_id)
+#define mmc_card_id(c)		(dev_name(&(c)->dev))
 
 #define mmc_list_to_card(l)	container_of(l, struct mmc_card, node)
 #define mmc_get_drvdata(c)	dev_get_drvdata(&(c)->dev)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index bde891f..f842f23 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -176,7 +176,7 @@
 
 #define mmc_dev(x)	((x)->parent)
 #define mmc_classdev(x)	(&(x)->class_dev)
-#define mmc_hostname(x)	((x)->class_dev.bus_id)
+#define mmc_hostname(x)	(dev_name(&(x)->class_dev))
 
 extern int mmc_suspend_host(struct mmc_host *, pm_message_t);
 extern int mmc_resume_host(struct mmc_host *);
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h
index 07bee4a..451bdfc 100644
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -63,7 +63,7 @@
 
 #define sdio_func_set_present(f) ((f)->state |= SDIO_STATE_PRESENT)
 
-#define sdio_func_id(f)		((f)->dev.bus_id)
+#define sdio_func_id(f)		(dev_name(&(f)->dev))
 
 #define sdio_get_drvdata(f)	dev_get_drvdata(&(f)->dev)
 #define sdio_set_drvdata(f,d)	dev_set_drvdata(&(f)->dev, d)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c75b82b..feb4657 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1136,7 +1136,7 @@
 #endif
 
 #ifdef CONFIG_HAS_IOMEM
-static inline void * pci_ioremap_bar(struct pci_dev *pdev, int bar)
+static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar)
 {
 	/*
 	 * Make sure the BAR is actually a memory resource, not an IO resource
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 2e4d58b..3f9a600 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -64,8 +64,17 @@
  * Call a function on all other processors
  */
 int smp_call_function(void(*func)(void *info), void *info, int wait);
+/* Deprecated: use smp_call_function_many() which uses a cpumask ptr. */
 int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
 				int wait);
+
+static inline void smp_call_function_many(const struct cpumask *mask,
+					  void (*func)(void *info), void *info,
+					  int wait)
+{
+	smp_call_function_mask(*mask, func, info, wait);
+}
+
 int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
 				int wait);
 void __smp_call_function_single(int cpuid, struct call_single_data *data);
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 34a7ee0..117f1b7 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -99,7 +99,7 @@
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
 				| SD_WAKE_AFFINE	\
-				| SD_WAKE_IDLE		\
+				| SD_WAKE_BALANCE	\
 				| SD_SHARE_CPUPOWER,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
@@ -120,10 +120,10 @@
 	.wake_idx		= 1,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
 				| SD_WAKE_AFFINE	\
+				| SD_WAKE_BALANCE	\
 				| SD_SHARE_PKG_RESOURCES\
 				| BALANCE_FOR_MC_POWER,	\
 	.last_balance		= jiffies,		\
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 89a5a12..b362911 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -240,4 +240,12 @@
 	cancel_delayed_work_sync(work);
 }
 
+#ifndef CONFIG_SMP
+static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+{
+	return fn(arg);
+}
+#else
+long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg);
+#endif /* CONFIG_SMP */
 #endif
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 7dd29b7..c29ff1d 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -54,6 +54,7 @@
         atomic_long_t           inflight;
         spinlock_t		lock;
 	unsigned int		gc_candidate : 1;
+	unsigned int		gc_maybe_cycle : 1;
         wait_queue_head_t       peer_wait;
 };
 #define unix_sk(__sk) ((struct unix_sock *)__sk)
diff --git a/include/sound/core.h b/include/sound/core.h
index 35424a9..1508c4e 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -385,9 +385,13 @@
 
 #else /* !CONFIG_SND_DEBUG */
 
-#define snd_printd(fmt, args...)	/* nothing */
-#define snd_BUG()			/* nothing */
-#define snd_BUG_ON(cond)	({/*(void)(cond);*/ 0;})  /* always false */
+#define snd_printd(fmt, args...)	do { } while (0)
+#define snd_BUG()			do { } while (0)
+static inline int __snd_bug_on(void)
+{
+	return 0;
+}
+#define snd_BUG_ON(cond)		__snd_bug_on()  /* always false */
 
 #endif /* CONFIG_SND_DEBUG */
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 86d4904..5a732c5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -499,3 +499,6 @@
 #endif
 };
 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
+
+const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
+EXPORT_SYMBOL(cpu_all_bits);
diff --git a/kernel/sched.c b/kernel/sched.c
index 82cc839..57c933f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6877,15 +6877,17 @@
 	struct sched_domain *tmp;
 
 	/* Remove the sched domains which do not contribute to scheduling. */
-	for (tmp = sd; tmp; tmp = tmp->parent) {
+	for (tmp = sd; tmp; ) {
 		struct sched_domain *parent = tmp->parent;
 		if (!parent)
 			break;
+
 		if (sd_parent_degenerate(tmp, parent)) {
 			tmp->parent = parent->parent;
 			if (parent->parent)
 				parent->parent->child = tmp;
-		}
+		} else
+			tmp = tmp->parent;
 	}
 
 	if (sd && sd_degenerate(sd)) {
@@ -7674,6 +7676,7 @@
 error:
 	free_sched_groups(cpu_map, tmpmask);
 	SCHED_CPUMASK_FREE((void *)allmasks);
+	kfree(rd);
 	return -ENOMEM;
 #endif
 }
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f928f2a..d4dc69d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -970,6 +970,51 @@
 	return ret;
 }
 
+#ifdef CONFIG_SMP
+struct work_for_cpu {
+	struct work_struct work;
+	long (*fn)(void *);
+	void *arg;
+	long ret;
+};
+
+static void do_work_for_cpu(struct work_struct *w)
+{
+	struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
+
+	wfc->ret = wfc->fn(wfc->arg);
+}
+
+/**
+ * work_on_cpu - run a function in user context on a particular cpu
+ * @cpu: the cpu to run on
+ * @fn: the function to run
+ * @arg: the function arg
+ *
+ * This will return -EINVAL in the cpu is not online, or the return value
+ * of @fn otherwise.
+ */
+long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+{
+	struct work_for_cpu wfc;
+
+	INIT_WORK(&wfc.work, do_work_for_cpu);
+	wfc.fn = fn;
+	wfc.arg = arg;
+	get_online_cpus();
+	if (unlikely(!cpu_online(cpu)))
+		wfc.ret = -EINVAL;
+	else {
+		schedule_work_on(cpu, &wfc.work);
+		flush_work(&wfc.work);
+	}
+	put_online_cpus();
+
+	return wfc.ret;
+}
+EXPORT_SYMBOL_GPL(work_on_cpu);
+#endif /* CONFIG_SMP */
+
 void __init init_workqueues(void)
 {
 	cpu_populated_map = cpu_online_map;
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 5f97dc2..8d03f22 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -2,6 +2,7 @@
 #include <linux/bitops.h>
 #include <linux/cpumask.h>
 #include <linux/module.h>
+#include <linux/bootmem.h>
 
 int __first_cpu(const cpumask_t *srcp)
 {
@@ -35,3 +36,81 @@
 	return cpu;
 }
 EXPORT_SYMBOL(__any_online_cpu);
+
+/**
+ * cpumask_next_and - get the next cpu in *src1p & *src2p
+ * @n: the cpu prior to the place to search (ie. return will be > @n)
+ * @src1p: the first cpumask pointer
+ * @src2p: the second cpumask pointer
+ *
+ * Returns >= nr_cpu_ids if no further cpus set in both.
+ */
+int cpumask_next_and(int n, const struct cpumask *src1p,
+		     const struct cpumask *src2p)
+{
+	while ((n = cpumask_next(n, src1p)) < nr_cpu_ids)
+		if (cpumask_test_cpu(n, src2p))
+			break;
+	return n;
+}
+EXPORT_SYMBOL(cpumask_next_and);
+
+/**
+ * cpumask_any_but - return a "random" in a cpumask, but not this one.
+ * @mask: the cpumask to search
+ * @cpu: the cpu to ignore.
+ *
+ * Often used to find any cpu but smp_processor_id() in a mask.
+ * Returns >= nr_cpu_ids if no cpus set.
+ */
+int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
+{
+	unsigned int i;
+
+	cpumask_check(cpu);
+	for_each_cpu(i, mask)
+		if (i != cpu)
+			break;
+	return i;
+}
+
+/* These are not inline because of header tangles. */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+	if (likely(slab_is_available()))
+		*mask = kmalloc(cpumask_size(), flags);
+	else {
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+		printk(KERN_ERR
+			"=> alloc_cpumask_var: kmalloc not available!\n");
+		dump_stack();
+#endif
+		*mask = NULL;
+	}
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+	if (!*mask) {
+		printk(KERN_ERR "=> alloc_cpumask_var: failed!\n");
+		dump_stack();
+	}
+#endif
+	return *mask != NULL;
+}
+EXPORT_SYMBOL(alloc_cpumask_var);
+
+void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
+{
+	*mask = alloc_bootmem(cpumask_size());
+}
+
+void free_cpumask_var(cpumask_var_t mask)
+{
+	kfree(mask);
+}
+EXPORT_SYMBOL(free_cpumask_var);
+
+void __init free_bootmem_cpumask_var(cpumask_var_t mask)
+{
+	free_bootmem((unsigned long)mask, cpumask_size());
+}
+#endif
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 66fad3f..ba6b0f5 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -592,6 +592,8 @@
 
 #define VMAP_BLOCK_SIZE		(VMAP_BBMAP_BITS * PAGE_SIZE)
 
+static bool vmap_initialized __read_mostly = false;
+
 struct vmap_block_queue {
 	spinlock_t lock;
 	struct list_head free;
@@ -828,6 +830,9 @@
 	int cpu;
 	int flush = 0;
 
+	if (unlikely(!vmap_initialized))
+		return;
+
 	for_each_possible_cpu(cpu) {
 		struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
 		struct vmap_block *vb;
@@ -942,6 +947,8 @@
 		INIT_LIST_HEAD(&vbq->dirty);
 		vbq->nr_dirty = 0;
 	}
+
+	vmap_initialized = true;
 }
 
 void unmap_kernel_range(unsigned long addr, unsigned long size)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4d3c607..eb90f77 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1302,14 +1302,23 @@
 	sock_wfree(skb);
 }
 
-static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	int i;
+
+	/*
+	 * Need to duplicate file references for the sake of garbage
+	 * collection.  Otherwise a socket in the fps might become a
+	 * candidate for GC while the skb is not yet queued.
+	 */
+	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+	if (!UNIXCB(skb).fp)
+		return -ENOMEM;
+
 	for (i=scm->fp->count-1; i>=0; i--)
 		unix_inflight(scm->fp->fp[i]);
-	UNIXCB(skb).fp = scm->fp;
 	skb->destructor = unix_destruct_fds;
-	scm->fp = NULL;
+	return 0;
 }
 
 /*
@@ -1368,8 +1377,11 @@
 		goto out;
 
 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-	if (siocb->scm->fp)
-		unix_attach_fds(siocb->scm, skb);
+	if (siocb->scm->fp) {
+		err = unix_attach_fds(siocb->scm, skb);
+		if (err)
+			goto out_free;
+	}
 	unix_get_secdata(siocb->scm, skb);
 
 	skb_reset_transport_header(skb);
@@ -1538,8 +1550,13 @@
 		size = min_t(int, size, skb_tailroom(skb));
 
 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-		if (siocb->scm->fp)
-			unix_attach_fds(siocb->scm, skb);
+		if (siocb->scm->fp) {
+			err = unix_attach_fds(siocb->scm, skb);
+			if (err) {
+				kfree_skb(skb);
+				goto out_err;
+			}
+		}
 
 		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
 			kfree_skb(skb);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2a27b84..6d4a9a8 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -186,8 +186,17 @@
 				 */
 				struct sock *sk = unix_get_socket(*fp++);
 				if (sk) {
-					hit = true;
-					func(unix_sk(sk));
+					struct unix_sock *u = unix_sk(sk);
+
+					/*
+					 * Ignore non-candidates, they could
+					 * have been added to the queues after
+					 * starting the garbage collection
+					 */
+					if (u->gc_candidate) {
+						hit = true;
+						func(u);
+					}
 				}
 			}
 			if (hit && hitlist != NULL) {
@@ -249,11 +258,11 @@
 {
 	atomic_long_inc(&u->inflight);
 	/*
-	 * If this is still a candidate, move it to the end of the
-	 * list, so that it's checked even if it was already passed
-	 * over
+	 * If this still might be part of a cycle, move it to the end
+	 * of the list, so that it's checked even if it was already
+	 * passed over
 	 */
-	if (u->gc_candidate)
+	if (u->gc_maybe_cycle)
 		list_move_tail(&u->link, &gc_candidates);
 }
 
@@ -267,6 +276,7 @@
 	struct unix_sock *next;
 	struct sk_buff_head hitlist;
 	struct list_head cursor;
+	LIST_HEAD(not_cycle_list);
 
 	spin_lock(&unix_gc_lock);
 
@@ -282,10 +292,14 @@
 	 *
 	 * Holding unix_gc_lock will protect these candidates from
 	 * being detached, and hence from gaining an external
-	 * reference.  This also means, that since there are no
-	 * possible receivers, the receive queues of these sockets are
-	 * static during the GC, even though the dequeue is done
-	 * before the detach without atomicity guarantees.
+	 * reference.  Since there are no possible receivers, all
+	 * buffers currently on the candidates' queues stay there
+	 * during the garbage collection.
+	 *
+	 * We also know that no new candidate can be added onto the
+	 * receive queues.  Other, non candidate sockets _can_ be
+	 * added to queue, so we must make sure only to touch
+	 * candidates.
 	 */
 	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
 		long total_refs;
@@ -299,6 +313,7 @@
 		if (total_refs == inflight_refs) {
 			list_move_tail(&u->link, &gc_candidates);
 			u->gc_candidate = 1;
+			u->gc_maybe_cycle = 1;
 		}
 	}
 
@@ -325,14 +340,24 @@
 		list_move(&cursor, &u->link);
 
 		if (atomic_long_read(&u->inflight) > 0) {
-			list_move_tail(&u->link, &gc_inflight_list);
-			u->gc_candidate = 0;
+			list_move_tail(&u->link, &not_cycle_list);
+			u->gc_maybe_cycle = 0;
 			scan_children(&u->sk, inc_inflight_move_tail, NULL);
 		}
 	}
 	list_del(&cursor);
 
 	/*
+	 * not_cycle_list contains those sockets which do not make up a
+	 * cycle.  Restore these to the inflight list.
+	 */
+	while (!list_empty(&not_cycle_list)) {
+		u = list_entry(not_cycle_list.next, struct unix_sock, link);
+		u->gc_candidate = 0;
+		list_move_tail(&u->link, &gc_inflight_list);
+	}
+
+	/*
 	 * Now gc_candidates contains only garbage.  Restore original
 	 * inflight counters for these as well, and remove the skbuffs
 	 * which are creating the cycle(s).
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index ba6bf5d..1264b8e 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -15,15 +15,18 @@
 version=$KERNELRELEASE
 revision=`cat .version`
 tmpdir="$objtree/debian/tmp"
+fwdir="$objtree/debian/fwtmp"
 packagename=linux-$version
+fwpackagename=linux-firmware-image
 
 if [ "$ARCH" == "um" ] ; then
 	packagename=user-mode-linux-$version
 fi
 
 # Setup the directory structure
-rm -rf "$tmpdir"
+rm -rf "$tmpdir" "$fwdir"
 mkdir -p "$tmpdir/DEBIAN" "$tmpdir/lib" "$tmpdir/boot"
+mkdir -p "$fwdir/DEBIAN" "$fwdir/lib"
 if [ "$ARCH" == "um" ] ; then
 	mkdir -p "$tmpdir/usr/lib/uml/modules/$version" "$tmpdir/usr/share/doc/$packagename" "$tmpdir/usr/bin"
 fi
@@ -107,6 +110,7 @@
 
 Package: $packagename
 Provides: kernel-image-$version, linux-image-$version
+Suggests: $fwpackagename
 Architecture: any
 Description: Linux kernel, version $version
  This package contains the Linux kernel, modules and corresponding other
@@ -118,8 +122,24 @@
 chown -R root:root "$tmpdir"
 chmod -R go-w "$tmpdir"
 
+# Do we have firmware? Move it out of the way and build it into a package.
+if [ -e "$tmpdir/lib/firmware" ]; then
+	mv "$tmpdir/lib/firmware" "$fwdir/lib/"
+
+	cat <<EOF >> debian/control
+
+Package: $fwpackagename
+Architecture: all
+Description: Linux kernel firmware, version $version
+ This package contains firmware from the Linux kernel, version $version
+EOF
+
+	dpkg-gencontrol -isp -p$fwpackagename -P"$fwdir"
+	dpkg --build "$fwdir" ..
+fi
+
 # Perform the final magic
-dpkg-gencontrol -isp
+dpkg-gencontrol -isp -p$packagename
 dpkg --build "$tmpdir" ..
 
 exit 0
diff --git a/security/keys/internal.h b/security/keys/internal.h
index b39f5c2..239098f 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -107,6 +107,7 @@
 
 extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
 
+extern int install_user_keyrings(struct task_struct *tsk);
 extern int install_thread_keyring(struct task_struct *tsk);
 extern int install_process_keyring(struct task_struct *tsk);
 
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 5be6d01..45b240a 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -40,7 +40,7 @@
 /*
  * install user and user session keyrings for a particular UID
  */
-static int install_user_keyrings(struct task_struct *tsk)
+int install_user_keyrings(struct task_struct *tsk)
 {
 	struct user_struct *user = tsk->user;
 	struct key *uid_keyring, *session_keyring;
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index ba32ca6..abea08f 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -74,6 +74,10 @@
 
 	kenter("{%d},{%d},%s", key->serial, authkey->serial, op);
 
+	ret = install_user_keyrings(tsk);
+	if (ret < 0)
+		goto error_alloc;
+
 	/* allocate a new session keyring */
 	sprintf(desc, "_req.%u", key->serial);
 
diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig
index 660beb4..ce0aa04 100644
--- a/sound/isa/Kconfig
+++ b/sound/isa/Kconfig
@@ -211,7 +211,7 @@
 
 config SND_GUSEXTREME
 	tristate "Gravis UltraSound Extreme"
-	select SND_HWDEP
+	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	select SND_PCM
 	help
diff --git a/sound/pci/hda/hda_proc.c b/sound/pci/hda/hda_proc.c
index 743d779..c39af98 100644
--- a/sound/pci/hda/hda_proc.c
+++ b/sound/pci/hda/hda_proc.c
@@ -483,6 +483,8 @@
 		    (gpio & AC_GPIO_UNSOLICITED) ? 1 : 0,
 		    (gpio & AC_GPIO_WAKE) ? 1 : 0);
 	max = gpio & AC_GPIO_IO_COUNT;
+	if (!max || max > 8)
+		return;
 	enable = snd_hda_codec_read(codec, nid, 0,
 				    AC_VERB_GET_GPIO_MASK, 0);
 	direction = snd_hda_codec_read(codec, nid, 0,
diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index d3fd432..686c774 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -3861,6 +3861,8 @@
 static struct snd_pci_quirk ad1884a_cfg_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x3030, "HP", AD1884A_MOBILE),
 	SND_PCI_QUIRK(0x103c, 0x3056, "HP", AD1884A_MOBILE),
+	SND_PCI_QUIRK(0x103c, 0x30e7, "HP EliteBook 8530p", AD1884A_LAPTOP),
+	SND_PCI_QUIRK(0x103c, 0x3614, "HP 6730s", AD1884A_LAPTOP),
 	SND_PCI_QUIRK(0x17aa, 0x20ac, "Thinkpad X300", AD1884A_THINKPAD),
 	{}
 };
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a4666c9..a378c01 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8469,6 +8469,7 @@
 	SND_PCI_QUIRK(0x17aa, 0x3bfd, "Lenovo NB0763", ALC883_LENOVO_NB0763),
 	SND_PCI_QUIRK(0x17aa, 0x101d, "Lenovo Sky", ALC888_LENOVO_SKY),
 	SND_PCI_QUIRK(0x17c0, 0x4071, "MEDION MD2", ALC883_MEDION_MD2),
+	SND_PCI_QUIRK(0x17c0, 0x4085, "MEDION MD96630", ALC888_LENOVO_MS7195_DIG),
 	SND_PCI_QUIRK(0x17f2, 0x5000, "Albatron KI690-AM2", ALC883_6ST_DIG),
 	SND_PCI_QUIRK(0x1991, 0x5625, "Haier W66", ALC883_HAIER_W66),
 	SND_PCI_QUIRK(0x8086, 0x0001, "DG33BUC", ALC883_3ST_6ch_INTEL),
diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
index d723543..736246f 100644
--- a/sound/pci/rme9652/hdsp.c
+++ b/sound/pci/rme9652/hdsp.c
@@ -4548,11 +4548,20 @@
 {
 	struct hdsp *hdsp = (struct hdsp *)hw->private_data;	
 	void __user *argp = (void __user *)arg;
+	int err;
 
 	switch (cmd) {
 	case SNDRV_HDSP_IOCTL_GET_PEAK_RMS: {
 		struct hdsp_peak_rms __user *peak_rms = (struct hdsp_peak_rms __user *)arg;
 
+		err = hdsp_check_for_iobox(hdsp);
+		if (err < 0)
+			return err;
+
+		err = hdsp_check_for_firmware(hdsp, 1);
+		if (err < 0)
+			return err;
+
 		if (!(hdsp->state & HDSP_FirmwareLoaded)) {
 			snd_printk(KERN_ERR "Hammerfall-DSP: firmware needs to be uploaded to the card.\n");
 			return -EINVAL;
@@ -4572,10 +4581,14 @@
 		unsigned long flags;
 		int i;
 		
-		if (!(hdsp->state & HDSP_FirmwareLoaded)) {
-			snd_printk(KERN_ERR "Hammerfall-DSP: Firmware needs to be uploaded to the card.\n");	
-			return -EINVAL;
-		}
+		err = hdsp_check_for_iobox(hdsp);
+		if (err < 0)
+			return err;
+
+		err = hdsp_check_for_firmware(hdsp, 1);
+		if (err < 0)
+			return err;
+
 		spin_lock_irqsave(&hdsp->lock, flags);
 		info.pref_sync_ref = (unsigned char)hdsp_pref_sync_ref(hdsp);
 		info.wordclock_sync_check = (unsigned char)hdsp_wc_sync_check(hdsp);
@@ -5045,6 +5058,10 @@
 		/* we wait 2 seconds to let freshly inserted cardbus cards do their hardware init */
 		ssleep(2);
 
+		err = hdsp_check_for_iobox(hdsp);
+		if (err < 0)
+			return err;
+
 		if ((hdsp_read (hdsp, HDSP_statusRegister) & HDSP_DllError) != 0) {
 #ifdef HDSP_FW_LOADER
 			if ((err = hdsp_request_fw_loader(hdsp)) < 0)
@@ -5057,7 +5074,7 @@
 				/* init is complete, we return */
 				return 0;
 #endif
-			/* no iobox connected, we defer initialization */
+			/* we defer initialization */
 			snd_printk(KERN_INFO "Hammerfall-DSP: card initialization pending : waiting for firmware\n");
 			if ((err = snd_hdsp_create_hwdep(card, hdsp)) < 0)
 				return err;