Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched-devel

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched-devel:
  sched: add arch_update_cpu_topology hook.
  sched: add exported arch_reinit_sched_domains() to header file.
  sched: remove double unlikely from schedule()
  sched: cleanup old and rarely used 'debug' features.
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 608152a..00df126 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -18,6 +18,7 @@
 #include <linux/pci.h>
 #include <linux/bitops.h>
 #include <linux/ioport.h>
+#include <linux/suspend.h>
 #include <asm/e820.h>
 #include <asm/io.h>
 #include <asm/gart.h>
@@ -76,6 +77,8 @@
 	printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
 			aper_size >> 10, __pa(p));
 	insert_aperture_resource((u32)__pa(p), aper_size);
+	register_nosave_region((u32)__pa(p) >> PAGE_SHIFT,
+				(u32)__pa(p+aper_size) >> PAGE_SHIFT);
 
 	return (u32)__pa(p);
 }
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index be83336..a6450b3 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -711,7 +711,8 @@
 		trim_size = end_pfn;
 		trim_size <<= PAGE_SHIFT;
 		trim_size -= trim_start;
-		add_memory_region(trim_start, trim_size, E820_RESERVED);
+		update_memory_range(trim_start, trim_size, E820_RAM,
+					E820_RESERVED);
 		update_e820();
 		return 1;
 	}
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 4e16ef4..80444c5 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -749,6 +749,32 @@
 	return 0;
 }
 early_param("memmap", parse_memmap);
+void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+				unsigned new_type)
+{
+	int i;
+
+	BUG_ON(old_type == new_type);
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		u64 final_start, final_end;
+		if (ei->type != old_type)
+			continue;
+		/* totally covered? */
+		if (ei->addr >= start && ei->size <= size) {
+			ei->type = new_type;
+			continue;
+		}
+		/* partially covered */
+		final_start = max(start, ei->addr);
+		final_end = min(start + size, ei->addr + ei->size);
+		if (final_start >= final_end)
+			continue;
+		add_memory_region(final_start, final_end - final_start,
+					 new_type);
+	}
+}
 void __init update_e820(void)
 {
 	u8 nr_map;
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 9f65b4c..9be6971 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -744,6 +744,33 @@
 	}
 }
 
+void __init update_memory_range(u64 start, u64 size, unsigned old_type,
+				unsigned new_type)
+{
+	int i;
+
+	BUG_ON(old_type == new_type);
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		u64 final_start, final_end;
+		if (ei->type != old_type)
+			continue;
+		/* totally covered? */
+		if (ei->addr >= start && ei->size <= size) {
+			ei->type = new_type;
+			continue;
+		}
+		/* partially covered */
+		final_start = max(start, ei->addr);
+		final_end = min(start + size, ei->addr + ei->size);
+		if (final_start >= final_end)
+			continue;
+		add_memory_region(final_start, final_end - final_start,
+					 new_type);
+	}
+}
+
 void __init update_e820(void)
 {
 	u8 nr_map;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index fd8ca53..74d87ea 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -657,7 +657,7 @@
 	.asciz "Unknown interrupt or fault at EIP %p %p %p\n"
 
 fault_msg:
-	.ascii								\
+	.asciz								\
 /* fault info: */	"BUG: Int %d: CR2 %p\n"				\
 /* pusha regs: */	"     EDI %p  ESI %p  EBP %p  ESP %p\n"		\
 			"     EBX %p  EDX %p  ECX %p  EAX %p\n"		\
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index a82473d..8bc1e18 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -8,6 +8,8 @@
 #include <linux/pci.h>
 #include <linux/module.h>
 #include <linux/dmar.h>
+#include <linux/bootmem.h>
+#include <asm/proto.h>
 #include <asm/io.h>
 #include <asm/gart.h>
 #include <asm/calgary.h>
@@ -53,11 +55,6 @@
 	int node;
 
 	node = dev_to_node(dev);
-	if (node == -1)
-		node = numa_node_id();
-
-	if (node < first_node(node_online_map))
-		node = first_node(node_online_map);
 
 	page = alloc_pages_node(node, gfp, order);
 	return page ? page_address(page) : NULL;
@@ -291,8 +288,55 @@
 }
 early_param("iommu", iommu_setup);
 
+static __initdata void *dma32_bootmem_ptr;
+static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
+
+static int __init parse_dma32_size_opt(char *p)
+{
+	if (!p)
+		return -EINVAL;
+	dma32_bootmem_size = memparse(p, &p);
+	return 0;
+}
+early_param("dma32_size", parse_dma32_size_opt);
+
+void __init dma32_reserve_bootmem(void)
+{
+	unsigned long size, align;
+	if (end_pfn <= MAX_DMA32_PFN)
+		return;
+
+	align = 64ULL<<20;
+	size = round_up(dma32_bootmem_size, align);
+	dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
+				 __pa(MAX_DMA_ADDRESS));
+	if (dma32_bootmem_ptr)
+		dma32_bootmem_size = size;
+	else
+		dma32_bootmem_size = 0;
+}
+static void __init dma32_free_bootmem(void)
+{
+	int node;
+
+	if (end_pfn <= MAX_DMA32_PFN)
+		return;
+
+	if (!dma32_bootmem_ptr)
+		return;
+
+	for_each_online_node(node)
+		free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
+				  dma32_bootmem_size);
+
+	dma32_bootmem_ptr = NULL;
+	dma32_bootmem_size = 0;
+}
+
 void __init pci_iommu_alloc(void)
 {
+	/* free the range so iommu could get some range less than 4G */
+	dma32_free_bootmem();
 	/*
 	 * The order of these functions is important for
 	 * fall-back/fail-over reasons
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index c47208f..d89a648 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -363,6 +363,8 @@
 			nvidia_force_enable_hpet);
 
 /* LPC bridges */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0260,
+			nvidia_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0360,
 			nvidia_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0361,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 55ceb8c..484c4a8 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -152,6 +152,24 @@
 			DMI_MATCH(DMI_BOARD_NAME, "0WF810"),
 		},
 	},
+	{       /* Handle problems with rebooting on Dell Optiplex 745's DFF*/
+		.callback = set_bios_reboot,
+		.ident = "Dell OptiPlex 745",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+			DMI_MATCH(DMI_BOARD_NAME, "0MM599"),
+		},
+	},
+	{       /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */
+		.callback = set_bios_reboot,
+		.ident = "Dell OptiPlex 745",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+			DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
+		},
+	},
 	{	/* Handle problems with rebooting on Dell 2400's */
 		.callback = set_bios_reboot,
 		.ident = "Dell PowerEdge 2400",
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 309366f..e24c456 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -142,14 +142,16 @@
 	printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
 	for_each_cpu_mask (i, cpu_possible_map) {
 		char *ptr;
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+		ptr = alloc_bootmem_pages(size);
+#else
+		int node = early_cpu_to_node(i);
 
-		if (!NODE_DATA(early_cpu_to_node(i))) {
-			printk("cpu with no node %d, num_online_nodes %d\n",
-			       i, num_online_nodes());
+		if (!node_online(node) || !NODE_DATA(node))
 			ptr = alloc_bootmem_pages(size);
-		} else { 
-			ptr = alloc_bootmem_pages_node(NODE_DATA(early_cpu_to_node(i)), size);
-		}
+		else
+			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
+#endif
 		if (!ptr)
 			panic("Cannot allocate cpu data for CPU %d\n", i);
 		cpu_pda(i)->data_offset = ptr - __per_cpu_start;
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 7637dc9..a775fe3 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -389,6 +389,8 @@
 
 	early_res_to_bootmem();
 
+	dma32_reserve_bootmem();
+
 #ifdef CONFIG_ACPI_SLEEP
 	/*
 	 * Reserve low memory region for sleep support.
diff --git a/arch/x86/mach-visws/traps.c b/arch/x86/mach-visws/traps.c
index 843b67a..bfac6ba 100644
--- a/arch/x86/mach-visws/traps.c
+++ b/arch/x86/mach-visws/traps.c
@@ -46,8 +46,9 @@
 	 */
 	set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
 	setup_local_APIC();
-	printk(KERN_INFO "Local APIC Version %#lx, ID %#lx\n",
-		apic_read(APIC_LVR), apic_read(APIC_ID));
+	printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
+		(unsigned int)apic_read(APIC_LVR),
+		(unsigned int)apic_read(APIC_ID));
 
 	set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
 	set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 8ccfee1..16b82ad 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -221,8 +221,7 @@
 				 bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
 	if (bootmap == NULL)  {
 		if (nodedata_phys < start || nodedata_phys >= end)
-			free_bootmem((unsigned long)node_data[nodeid],
-				     pgdat_size);
+			free_bootmem(nodedata_phys, pgdat_size);
 		node_data[nodeid] = NULL;
 		return;
 	}
diff --git a/include/asm-x86/cmpxchg_32.h b/include/asm-x86/cmpxchg_32.h
index cea1dae..959fad0 100644
--- a/include/asm-x86/cmpxchg_32.h
+++ b/include/asm-x86/cmpxchg_32.h
@@ -269,22 +269,26 @@
 ({									\
 	__typeof__(*(ptr)) __ret;					\
 	if (likely(boot_cpu_data.x86 > 3))				\
-		__ret = __cmpxchg((ptr), (unsigned long)(o),		\
-					(unsigned long)(n), sizeof(*(ptr))); \
+		__ret = (__typeof__(*(ptr)))__cmpxchg((ptr),		\
+				(unsigned long)(o), (unsigned long)(n),	\
+				sizeof(*(ptr)));			\
 	else								\
-		__ret = cmpxchg_386((ptr), (unsigned long)(o),		\
-					(unsigned long)(n), sizeof(*(ptr))); \
+		__ret = (__typeof__(*(ptr)))cmpxchg_386((ptr),		\
+				(unsigned long)(o), (unsigned long)(n),	\
+				sizeof(*(ptr)));			\
 	__ret;								\
 })
 #define cmpxchg_local(ptr, o, n)					\
 ({									\
 	__typeof__(*(ptr)) __ret;					\
 	if (likely(boot_cpu_data.x86 > 3))				\
-		__ret = __cmpxchg_local((ptr), (unsigned long)(o),	\
-					(unsigned long)(n), sizeof(*(ptr))); \
+		__ret = (__typeof__(*(ptr)))__cmpxchg_local((ptr),	\
+				(unsigned long)(o), (unsigned long)(n),	\
+				sizeof(*(ptr)));			\
 	else								\
-		__ret = cmpxchg_386((ptr), (unsigned long)(o),		\
-					(unsigned long)(n), sizeof(*(ptr))); \
+		__ret = (__typeof__(*(ptr)))cmpxchg_386((ptr),		\
+				(unsigned long)(o), (unsigned long)(n),	\
+				sizeof(*(ptr)));			\
 	__ret;								\
 })
 #endif
@@ -301,10 +305,12 @@
 ({									\
 	__typeof__(*(ptr)) __ret;					\
 	if (likely(boot_cpu_data.x86 > 4))				\
-		__ret = __cmpxchg64((ptr), (unsigned long long)(o),	\
+		__ret = (__typeof__(*(ptr)))__cmpxchg64((ptr),		\
+				(unsigned long long)(o),		\
 				(unsigned long long)(n));		\
 	else								\
-		__ret = cmpxchg_486_u64((ptr), (unsigned long long)(o),	\
+		__ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr),	\
+				(unsigned long long)(o),		\
 				(unsigned long long)(n));		\
 	__ret;								\
 })
@@ -312,10 +318,12 @@
 ({									\
 	__typeof__(*(ptr)) __ret;					\
 	if (likely(boot_cpu_data.x86 > 4))				\
-		__ret = __cmpxchg64_local((ptr), (unsigned long long)(o), \
+		__ret = (__typeof__(*(ptr)))__cmpxchg64_local((ptr),	\
+				(unsigned long long)(o),		\
 				(unsigned long long)(n));		\
 	else								\
-		__ret = cmpxchg_486_u64((ptr), (unsigned long long)(o),	\
+		__ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr),	\
+				(unsigned long long)(o),		\
 				(unsigned long long)(n));		\
 	__ret;								\
 })
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index f1da7eb..e7207a6 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -28,6 +28,8 @@
 extern void register_bootmem_low_pages(unsigned long max_low_pfn);
 extern void add_memory_region(unsigned long long start,
 			      unsigned long long size, int type);
+extern void update_memory_range(u64 start, u64 size, unsigned old_type,
+				unsigned new_type);
 extern void e820_register_memory(void);
 extern void limit_regions(unsigned long long size);
 extern void print_memory_map(char *who);
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index a560c4f..22ede73 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -18,6 +18,8 @@
 				    unsigned size, unsigned long align);
 extern void add_memory_region(unsigned long start, unsigned long size, 
 			      int type);
+extern void update_memory_range(u64 start, u64 size, unsigned old_type,
+				unsigned new_type);
 extern void setup_memory_region(void);
 extern void contig_e820_setup(void); 
 extern unsigned long e820_end_of_ram(void);
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index 1cb7c51..a05b289 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -52,13 +52,13 @@
 
 struct page;
 
-static void inline clear_user_page(void *page, unsigned long vaddr,
+static inline void clear_user_page(void *page, unsigned long vaddr,
 				struct page *pg)
 {
 	clear_page(page);
 }
 
-static void inline copy_user_page(void *to, void *from, unsigned long vaddr,
+static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
 				struct page *topage)
 {
 	copy_page(to, from);
diff --git a/include/asm-x86/pci_64.h b/include/asm-x86/pci_64.h
index 3746903..da8266a 100644
--- a/include/asm-x86/pci_64.h
+++ b/include/asm-x86/pci_64.h
@@ -25,6 +25,7 @@
 
 
 
+extern void dma32_reserve_bootmem(void);
 extern void pci_iommu_alloc(void);
 
 /* The PCI address space does equal the physical memory
diff --git a/include/asm-x86/sync_bitops.h b/include/asm-x86/sync_bitops.h
index cbce08a..6b775c9 100644
--- a/include/asm-x86/sync_bitops.h
+++ b/include/asm-x86/sync_bitops.h
@@ -23,10 +23,6 @@
  * This function is atomic and may not be reordered.  See __set_bit()
  * if you do not require the atomic guarantees.
  *
- * Note: there are no guarantees that this function will not be reordered
- * on non-x86 architectures, so if you are writing portable code,
- * make sure not to rely on its reordering guarantees.
- *
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
@@ -61,8 +57,7 @@
  * @nr: Bit to change
  * @addr: Address to start counting from
  *
- * change_bit() is atomic and may not be reordered. It may be
- * reordered on other architectures than x86.
+ * sync_change_bit() is atomic and may not be reordered.
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
@@ -80,7 +75,6 @@
  * @addr: Address to count from
  *
  * This operation is atomic and cannot be reordered.
- * It may be reordered on other architectures than x86.
  * It also implies a memory barrier.
  */
 static inline int sync_test_and_set_bit(int nr, volatile unsigned long * addr)
@@ -99,7 +93,6 @@
  * @addr: Address to count from
  *
  * This operation is atomic and cannot be reordered.
- * It can be reorderdered on other architectures other than x86.
  * It also implies a memory barrier.
  */
 static inline int sync_test_and_clear_bit(int nr, volatile unsigned long * addr)