Merge branch 'upstream-fixes'
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index d16520d..9ed449a 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -1338,8 +1338,7 @@
 	if (cpu == 0)
 		return -EBUSY;
 
-	/* We enable the timer again on the exit path of the death loop */
-	disable_APIC_timer();
+	clear_local_APIC();
 	/* Allow any queued timer interrupts to get serviced */
 	local_irq_enable();
 	mdelay(1);
diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c
index 5d09de8..8498b5a 100644
--- a/arch/i386/mm/ioremap.c
+++ b/arch/i386/mm/ioremap.c
@@ -223,9 +223,15 @@
 }
 EXPORT_SYMBOL(ioremap_nocache);
 
+/**
+ * iounmap - Free a IO remapping
+ * @addr: virtual address from ioremap_*
+ *
+ * Caller must ensure there is only one unmapping for the same pointer.
+ */
 void iounmap(volatile void __iomem *addr)
 {
-	struct vm_struct *p;
+	struct vm_struct *p, *o;
 
 	if ((void __force *)addr <= high_memory)
 		return;
@@ -239,22 +245,37 @@
 			addr < phys_to_virt(ISA_END_ADDRESS))
 		return;
 
-	write_lock(&vmlist_lock);
-	p = __remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr));
-	if (!p) { 
-		printk(KERN_WARNING "iounmap: bad address %p\n", addr);
+	addr = (volatile void *)(PAGE_MASK & (unsigned long __force)addr);
+
+	/* Use the vm area unlocked, assuming the caller
+	   ensures there isn't another iounmap for the same address
+	   in parallel. Reuse of the virtual address is prevented by
+	   leaving it in the global lists until we're done with it.
+	   cpa takes care of the direct mappings. */
+	read_lock(&vmlist_lock);
+	for (p = vmlist; p; p = p->next) {
+		if (p->addr == addr)
+			break;
+	}
+	read_unlock(&vmlist_lock);
+
+	if (!p) {
+		printk("iounmap: bad address %p\n", addr);
 		dump_stack();
-		goto out_unlock;
+		return;
 	}
 
+	/* Reset the direct mapping. Can block */
 	if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) {
 		change_page_attr(virt_to_page(__va(p->phys_addr)),
 				 p->size >> PAGE_SHIFT,
 				 PAGE_KERNEL);
 		global_flush_tlb();
 	} 
-out_unlock:
-	write_unlock(&vmlist_lock);
+
+	/* Finally remove it */
+	o = remove_vm_area((void *)addr);
+	BUG_ON(p != o || o == NULL);
 	kfree(p); 
 }
 EXPORT_SYMBOL(iounmap);
diff --git a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c
index 94331d6..e3ac502 100644
--- a/arch/i386/pci/direct.c
+++ b/arch/i386/pci/direct.c
@@ -13,7 +13,7 @@
 #define PCI_CONF1_ADDRESS(bus, devfn, reg) \
 	(0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
 
-static int pci_conf1_read(unsigned int seg, unsigned int bus,
+int pci_conf1_read(unsigned int seg, unsigned int bus,
 			  unsigned int devfn, int reg, int len, u32 *value)
 {
 	unsigned long flags;
@@ -42,7 +42,7 @@
 	return 0;
 }
 
-static int pci_conf1_write(unsigned int seg, unsigned int bus,
+int pci_conf1_write(unsigned int seg, unsigned int bus,
 			   unsigned int devfn, int reg, int len, u32 value)
 {
 	unsigned long flags;
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
index dfbf80c..08a0849 100644
--- a/arch/i386/pci/mmconfig.c
+++ b/arch/i386/pci/mmconfig.c
@@ -19,21 +19,25 @@
 /* The base address of the last MMCONFIG device accessed */
 static u32 mmcfg_last_accessed_device;
 
+static DECLARE_BITMAP(fallback_slots, 32);
+
 /*
  * Functions for accessing PCI configuration space with MMCONFIG accesses
  */
-static u32 get_base_addr(unsigned int seg, int bus)
+static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn)
 {
 	int cfg_num = -1;
 	struct acpi_table_mcfg_config *cfg;
 
+	if (seg == 0 && bus == 0 &&
+	    test_bit(PCI_SLOT(devfn), fallback_slots))
+		return 0;
+
 	while (1) {
 		++cfg_num;
 		if (cfg_num >= pci_mmcfg_config_num) {
-			/* something bad is going on, no cfg table is found. */
-			/* so we fall back to the old way we used to do this */
-			/* and just rely on the first entry to be correct. */
-			return pci_mmcfg_config[0].base_address;
+			/* Not found - fallback to type 1 */
+			return 0;
 		}
 		cfg = &pci_mmcfg_config[cfg_num];
 		if (cfg->pci_segment_group_number != seg)
@@ -44,9 +48,9 @@
 	}
 }
 
-static inline void pci_exp_set_dev_base(unsigned int seg, int bus, int devfn)
+static inline void pci_exp_set_dev_base(unsigned int base, int bus, int devfn)
 {
-	u32 dev_base = get_base_addr(seg, bus) | (bus << 20) | (devfn << 12);
+	u32 dev_base = base | (bus << 20) | (devfn << 12);
 	if (dev_base != mmcfg_last_accessed_device) {
 		mmcfg_last_accessed_device = dev_base;
 		set_fixmap_nocache(FIX_PCIE_MCFG, dev_base);
@@ -57,13 +61,18 @@
 			  unsigned int devfn, int reg, int len, u32 *value)
 {
 	unsigned long flags;
+	u32 base;
 
 	if (!value || (bus > 255) || (devfn > 255) || (reg > 4095))
 		return -EINVAL;
 
+	base = get_base_addr(seg, bus, devfn);
+	if (!base)
+		return pci_conf1_read(seg,bus,devfn,reg,len,value);
+
 	spin_lock_irqsave(&pci_config_lock, flags);
 
-	pci_exp_set_dev_base(seg, bus, devfn);
+	pci_exp_set_dev_base(base, bus, devfn);
 
 	switch (len) {
 	case 1:
@@ -86,13 +95,18 @@
 			   unsigned int devfn, int reg, int len, u32 value)
 {
 	unsigned long flags;
+	u32 base;
 
 	if ((bus > 255) || (devfn > 255) || (reg > 4095)) 
 		return -EINVAL;
 
+	base = get_base_addr(seg, bus, devfn);
+	if (!base)
+		return pci_conf1_write(seg,bus,devfn,reg,len,value);
+
 	spin_lock_irqsave(&pci_config_lock, flags);
 
-	pci_exp_set_dev_base(seg, bus, devfn);
+	pci_exp_set_dev_base(base, bus, devfn);
 
 	switch (len) {
 	case 1:
@@ -116,6 +130,37 @@
 	.write =	pci_mmcfg_write,
 };
 
+/* K8 systems have some devices (typically in the builtin northbridge)
+   that are only accessible using type1
+   Normally this can be expressed in the MCFG by not listing them
+   and assigning suitable _SEGs, but this isn't implemented in some BIOS.
+   Instead try to discover all devices on bus 0 that are unreachable using MM
+   and fallback for them.
+   We only do this for bus 0/seg 0 */
+static __init void unreachable_devices(void)
+{
+	int i;
+	unsigned long flags;
+
+	for (i = 0; i < 32; i++) {
+		u32 val1;
+		u32 addr;
+
+		pci_conf1_read(0, 0, PCI_DEVFN(i, 0), 0, 4, &val1);
+		if (val1 == 0xffffffff)
+			continue;
+
+		/* Locking probably not needed, but safer */
+		spin_lock_irqsave(&pci_config_lock, flags);
+		addr = get_base_addr(0, 0, PCI_DEVFN(i, 0));
+		if (addr != 0)
+			pci_exp_set_dev_base(addr, 0, PCI_DEVFN(i, 0));
+		if (addr == 0 || readl((u32 *)addr) != val1)
+			set_bit(i, fallback_slots);
+		spin_unlock_irqrestore(&pci_config_lock, flags);
+	}
+}
+
 static int __init pci_mmcfg_init(void)
 {
 	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
@@ -131,6 +176,8 @@
 	raw_pci_ops = &pci_mmcfg;
 	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
 
+	unreachable_devices();
+
  out:
 	return 0;
 }
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
index 127d53a..f550781 100644
--- a/arch/i386/pci/pci.h
+++ b/arch/i386/pci/pci.h
@@ -74,3 +74,10 @@
 
 extern int (*pcibios_enable_irq)(struct pci_dev *dev);
 extern void (*pcibios_disable_irq)(struct pci_dev *dev);
+
+extern int pci_conf1_write(unsigned int seg, unsigned int bus,
+			   unsigned int devfn, int reg, int len, u32 value);
+extern int pci_conf1_read(unsigned int seg, unsigned int bus,
+			  unsigned int devfn, int reg, int len, u32 *value);
+
+
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 830feb2..2b760d0 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -217,8 +217,7 @@
 	if (!tsk_used_math(tsk))
 		return 0;
 	if (!regs)
-		regs = (struct pt_regs *)tsk->thread.rsp0;
-	--regs;
+		regs = ((struct pt_regs *)tsk->thread.rsp0) - 1;
 	if (tsk == current)
 		unlazy_fpu(tsk);
 	set_fs(KERNEL_DS); 
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 683c33f..ecbd7b8 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -1181,7 +1181,7 @@
 	if (cpu == 0)
 		return -EBUSY;
 
-	disable_APIC_timer();
+	clear_local_APIC();
 
 	/*
 	 * HACK:
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index fdaddc4..7410279 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -59,7 +59,7 @@
 unsigned int cpu_khz;					/* TSC clocks / usec, not used here */
 static unsigned long hpet_period;			/* fsecs / HPET clock */
 unsigned long hpet_tick;				/* HPET clocks / interrupt */
-static int hpet_use_timer;
+static int hpet_use_timer;				/* Use counter of hpet for time keeping, otherwise PIT */
 unsigned long vxtime_hz = PIT_TICK_RATE;
 int report_lost_ticks;				/* command line option */
 unsigned long long monotonic_base;
@@ -908,12 +908,14 @@
 	if (!hpet_init())
                 vxtime_hz = (1000000000000000L + hpet_period / 2) /
 			hpet_period;
+	else
+		vxtime.hpet_address = 0;
 
 	if (hpet_use_timer) {
 		cpu_khz = hpet_calibrate_tsc();
 		timename = "HPET";
 #ifdef CONFIG_X86_PM_TIMER
-	} else if (pmtmr_ioport) {
+	} else if (pmtmr_ioport && !vxtime.hpet_address) {
 		vxtime_hz = PM_TIMER_FREQUENCY;
 		timename = "PM";
 		pit_init();
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index ecf7acb..0d260e4 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -247,9 +247,15 @@
 	return __ioremap(phys_addr, size, _PAGE_PCD);
 }
 
+/**
+ * iounmap - Free a IO remapping
+ * @addr: virtual address from ioremap_*
+ *
+ * Caller must ensure there is only one unmapping for the same pointer.
+ */
 void iounmap(volatile void __iomem *addr)
 {
-	struct vm_struct *p;
+	struct vm_struct *p, *o;
 
 	if (addr <= high_memory) 
 		return; 
@@ -257,12 +263,31 @@
 		addr < phys_to_virt(ISA_END_ADDRESS))
 		return;
 
-	write_lock(&vmlist_lock);
-	p = __remove_vm_area((void *)((unsigned long)addr & PAGE_MASK));
-	if (!p)
+	addr = (volatile void *)(PAGE_MASK & (unsigned long __force)addr);
+	/* Use the vm area unlocked, assuming the caller
+	   ensures there isn't another iounmap for the same address
+	   in parallel. Reuse of the virtual address is prevented by
+	   leaving it in the global lists until we're done with it.
+	   cpa takes care of the direct mappings. */
+	read_lock(&vmlist_lock);
+	for (p = vmlist; p; p = p->next) {
+		if (p->addr == addr)
+			break;
+	}
+	read_unlock(&vmlist_lock);
+
+	if (!p) {
 		printk("iounmap: bad address %p\n", addr);
-	else if (p->flags >> 20)
+		dump_stack();
+		return;
+	}
+
+	/* Reset the direct mapping. Can block */
+	if (p->flags >> 20)
 		ioremap_change_attr(p->phys_addr, p->size, 0);
-	write_unlock(&vmlist_lock);
+
+	/* Finally remove it */
+	o = remove_vm_area((void *)addr);
+	BUG_ON(p != o || o == NULL);
 	kfree(p); 
 }
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index a828a01..15b67d2 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -53,6 +53,8 @@
 	int res = -1;
 	unsigned long addr, end;
 
+	if (shift >= 64)
+		return -1;
 	memset(memnodemap, 0xff, sizeof(memnodemap));
 	for (i = 0; i < numnodes; i++) {
 		addr = nodes[i].start;
@@ -65,7 +67,7 @@
 			if (memnodemap[addr >> shift] != 0xff)
 				return -1;
 			memnodemap[addr >> shift] = i;
-			addr += (1 << shift);
+                       addr += (1UL << shift);
 		} while (addr < end);
 		res = 1;
 	} 
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index a0838c4..9c4f907 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -8,10 +8,13 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/acpi.h>
+#include <linux/bitmap.h>
 #include "pci.h"
 
 #define MMCONFIG_APER_SIZE (256*1024*1024)
 
+static DECLARE_BITMAP(fallback_slots, 32);
+
 /* Static virtual mapping of the MMCONFIG aperture */
 struct mmcfg_virt {
 	struct acpi_table_mcfg_config *cfg;
@@ -19,7 +22,7 @@
 };
 static struct mmcfg_virt *pci_mmcfg_virt;
 
-static char *get_virt(unsigned int seg, int bus)
+static char *get_virt(unsigned int seg, unsigned bus)
 {
 	int cfg_num = -1;
 	struct acpi_table_mcfg_config *cfg;
@@ -27,10 +30,9 @@
 	while (1) {
 		++cfg_num;
 		if (cfg_num >= pci_mmcfg_config_num) {
-			/* something bad is going on, no cfg table is found. */
-			/* so we fall back to the old way we used to do this */
-			/* and just rely on the first entry to be correct. */
-			return pci_mmcfg_virt[0].virt;
+			/* Not found - fall back to type 1. This happens
+			   e.g. on the internal devices of a K8 northbridge. */
+			return NULL;
 		}
 		cfg = pci_mmcfg_virt[cfg_num].cfg;
 		if (cfg->pci_segment_group_number != seg)
@@ -41,20 +43,30 @@
 	}
 }
 
-static inline char *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
+static char *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
 {
-
-	return get_virt(seg, bus) + ((bus << 20) | (devfn << 12));
+	char *addr;
+	if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots))
+		return NULL;
+	addr = get_virt(seg, bus);
+	if (!addr)
+		return NULL;
+ 	return addr + ((bus << 20) | (devfn << 12));
 }
 
 static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
 			  unsigned int devfn, int reg, int len, u32 *value)
 {
-	char *addr = pci_dev_base(seg, bus, devfn);
+	char *addr;
 
+	/* Why do we have this when nobody checks it. How about a BUG()!? -AK */
 	if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095)))
 		return -EINVAL;
 
+	addr = pci_dev_base(seg, bus, devfn);
+	if (!addr)
+		return pci_conf1_read(seg,bus,devfn,reg,len,value);
+
 	switch (len) {
 	case 1:
 		*value = readb(addr + reg);
@@ -73,11 +85,16 @@
 static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
 			   unsigned int devfn, int reg, int len, u32 value)
 {
-	char *addr = pci_dev_base(seg, bus, devfn);
+	char *addr;
 
+	/* Why do we have this when nobody checks it. How about a BUG()!? -AK */
 	if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095)))
 		return -EINVAL;
 
+	addr = pci_dev_base(seg, bus, devfn);
+	if (!addr)
+		return pci_conf1_write(seg,bus,devfn,reg,len,value);
+
 	switch (len) {
 	case 1:
 		writeb(value, addr + reg);
@@ -98,6 +115,30 @@
 	.write =	pci_mmcfg_write,
 };
 
+/* K8 systems have some devices (typically in the builtin northbridge)
+   that are only accessible using type1
+   Normally this can be expressed in the MCFG by not listing them
+   and assigning suitable _SEGs, but this isn't implemented in some BIOS.
+   Instead try to discover all devices on bus 0 that are unreachable using MM
+   and fallback for them.
+   We only do this for bus 0/seg 0 */
+static __init void unreachable_devices(void)
+{
+	int i;
+	for (i = 0; i < 32; i++) {
+		u32 val1;
+		char *addr;
+
+		pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1);
+		if (val1 == 0xffffffff)
+			continue;
+		addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0));
+		if (addr == NULL|| readl(addr) != val1) {
+			set_bit(i, &fallback_slots);
+		}
+	}
+}
+
 static int __init pci_mmcfg_init(void)
 {
 	int i;
@@ -128,6 +169,8 @@
 		printk(KERN_INFO "PCI: Using MMCONFIG at %x\n", pci_mmcfg_config[i].base_address);
 	}
 
+	unreachable_devices();
+
 	raw_pci_ops = &pci_mmcfg;
 	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
 
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index e34104d..c3441b3 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1464,8 +1464,10 @@
 			request_queue_t *q = disk->queue;
 			if (disk->flags & GENHD_FL_UP)
 				del_gendisk(disk);
-			if (q)	
+			if (q) {
 				blk_cleanup_queue(q);
+				drv->queue = NULL;
+			}
 		}
 	}
 
diff --git a/drivers/char/drm/radeon_cp.c b/drivers/char/drm/radeon_cp.c
index 03839ea..9f2b4ef 100644
--- a/drivers/char/drm/radeon_cp.c
+++ b/drivers/char/drm/radeon_cp.c
@@ -1522,7 +1522,7 @@
 
 	dev_priv->gart_size = init->gart_size;
 	dev_priv->gart_vm_start = dev_priv->fb_location
-	    + RADEON_READ(RADEON_CONFIG_APER_SIZE);
+	    + RADEON_READ(RADEON_CONFIG_APER_SIZE) * 2;
 
 #if __OS_HAS_AGP
 	if (!dev_priv->is_pci)
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 8b6e2a1..00d6830 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2280,7 +2280,7 @@
  	}
 
 	if (unlikely(skge->tx_avail < skb_shinfo(skb)->nr_frags +1)) {
-		if (!netif_stopped(dev)) {
+		if (!netif_queue_stopped(dev)) {
 			netif_stop_queue(dev);
 
 			printk(KERN_WARNING PFX "%s: ring full when queue awake!\n",
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 665ae79..d0a0fdb 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -2443,7 +2443,7 @@
 			struct scatterlist *psg = &qc->pad_sgent;
 			void *addr = kmap_atomic(psg->page, KM_IRQ0);
 			memcpy(addr + psg->offset, pad_buf, qc->pad_len);
-			kunmap_atomic(psg->page, KM_IRQ0);
+			kunmap_atomic(addr, KM_IRQ0);
 		}
 	} else {
 		if (sg_dma_len(&sg[0]) > 0)
@@ -2717,7 +2717,7 @@
 		if (qc->tf.flags & ATA_TFLAG_WRITE) {
 			void *addr = kmap_atomic(psg->page, KM_IRQ0);
 			memcpy(pad_buf, addr + psg->offset, qc->pad_len);
-			kunmap_atomic(psg->page, KM_IRQ0);
+			kunmap_atomic(addr, KM_IRQ0);
 		}
 
 		sg_dma_address(psg) = ap->pad_dma + (qc->tag * ATA_DMA_PAD_SZ);
diff --git a/drivers/video/cfbcopyarea.c b/drivers/video/cfbcopyarea.c
index cdc7157..7441532 100644
--- a/drivers/video/cfbcopyarea.c
+++ b/drivers/video/cfbcopyarea.c
@@ -64,8 +64,8 @@
 	int const shift = dst_idx-src_idx;
 	int left, right;
 
-	first = ~0UL >> dst_idx;
-	last = ~(~0UL >> ((dst_idx+n) % bits));
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
 
 	if (!shift) {
 		// Same alignment for source and dest
@@ -216,8 +216,8 @@
 
 	shift = dst_idx-src_idx;
 
-	first = ~0UL << (bits - 1 - dst_idx);
-	last = ~(~0UL << (bits - 1 - ((dst_idx-n) % bits)));
+	first = FB_SHIFT_LOW(~0UL, bits - 1 - dst_idx);
+	last = ~(FB_SHIFT_LOW(~0UL, bits - 1 - ((dst_idx-n) % bits)));
 
 	if (!shift) {
 		// Same alignment for source and dest
diff --git a/drivers/video/cfbfillrect.c b/drivers/video/cfbfillrect.c
index 167d931..e5ff62e 100644
--- a/drivers/video/cfbfillrect.c
+++ b/drivers/video/cfbfillrect.c
@@ -110,8 +110,8 @@
 	if (!n)
 		return;
 
-	first = ~0UL >> dst_idx;
-	last = ~(~0UL >> ((dst_idx+n) % bits));
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
 
 	if (dst_idx+n <= bits) {
 		// Single word
@@ -167,8 +167,8 @@
 	if (!n)
 		return;
 
-	first = ~0UL >> dst_idx;
-	last = ~(~0UL >> ((dst_idx+n) % bits));
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
 
 	if (dst_idx+n <= bits) {
 		// Single word
@@ -221,8 +221,8 @@
 	if (!n)
 		return;
 
-	first = ~0UL >> dst_idx;
-	last = ~(~0UL >> ((dst_idx+n) % bits));
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
 
 	if (dst_idx+n <= bits) {
 		// Single word
@@ -290,8 +290,8 @@
 	if (!n)
 		return;
 
-	first = ~0UL >> dst_idx;
-	last = ~(~0UL >> ((dst_idx+n) % bits));
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
 
 	if (dst_idx+n <= bits) {
 		// Single word
diff --git a/drivers/video/cfbimgblt.c b/drivers/video/cfbimgblt.c
index a7770c4..910e233 100644
--- a/drivers/video/cfbimgblt.c
+++ b/drivers/video/cfbimgblt.c
@@ -76,18 +76,6 @@
 #define FB_WRITEL fb_writel
 #define FB_READL  fb_readl
 
-#if defined (__BIG_ENDIAN)
-#define LEFT_POS(bpp)          (32 - bpp)
-#define SHIFT_HIGH(val, bits)  ((val) >> (bits))
-#define SHIFT_LOW(val, bits)   ((val) << (bits))
-#define BIT_NR(b)              (7 - (b))
-#else
-#define LEFT_POS(bpp)          (0)
-#define SHIFT_HIGH(val, bits)  ((val) << (bits))
-#define SHIFT_LOW(val, bits)   ((val) >> (bits))
-#define BIT_NR(b)              (b)
-#endif
-
 static inline void color_imageblit(const struct fb_image *image, 
 				   struct fb_info *p, u8 __iomem *dst1, 
 				   u32 start_index,
@@ -109,7 +97,7 @@
 		val = 0;
 		
 		if (start_index) {
-			u32 start_mask = ~(SHIFT_HIGH(~(u32)0, start_index));
+			u32 start_mask = ~(FB_SHIFT_HIGH(~(u32)0, start_index));
 			val = FB_READL(dst) & start_mask;
 			shift = start_index;
 		}
@@ -119,20 +107,20 @@
 				color = palette[*src];
 			else
 				color = *src;
-			color <<= LEFT_POS(bpp);
-			val |= SHIFT_HIGH(color, shift);
+			color <<= FB_LEFT_POS(bpp);
+			val |= FB_SHIFT_HIGH(color, shift);
 			if (shift >= null_bits) {
 				FB_WRITEL(val, dst++);
 	
 				val = (shift == null_bits) ? 0 : 
-					SHIFT_LOW(color, 32 - shift);
+					FB_SHIFT_LOW(color, 32 - shift);
 			}
 			shift += bpp;
 			shift &= (32 - 1);
 			src++;
 		}
 		if (shift) {
-			u32 end_mask = SHIFT_HIGH(~(u32)0, shift);
+			u32 end_mask = FB_SHIFT_HIGH(~(u32)0, shift);
 
 			FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
 		}
@@ -162,6 +150,8 @@
 	u32 i, j, l;
 	
 	dst2 = (u32 __iomem *) dst1;
+	fgcolor <<= FB_LEFT_POS(bpp);
+	bgcolor <<= FB_LEFT_POS(bpp);
 
 	for (i = image->height; i--; ) {
 		shift = val = 0;
@@ -172,22 +162,21 @@
 
 		/* write leading bits */
 		if (start_index) {
-			u32 start_mask = ~(SHIFT_HIGH(~(u32)0, start_index));
+			u32 start_mask = ~(FB_SHIFT_HIGH(~(u32)0,start_index));
 			val = FB_READL(dst) & start_mask;
 			shift = start_index;
 		}
 
 		while (j--) {
 			l--;
-			color = (*s & 1 << (BIT_NR(l))) ? fgcolor : bgcolor;
-			color <<= LEFT_POS(bpp);
-			val |= SHIFT_HIGH(color, shift);
+			color = (*s & 1 << (FB_BIT_NR(l))) ? fgcolor : bgcolor;
+			val |= FB_SHIFT_HIGH(color, shift);
 			
 			/* Did the bitshift spill bits to the next long? */
 			if (shift >= null_bits) {
 				FB_WRITEL(val, dst++);
 				val = (shift == null_bits) ? 0 :
-					 SHIFT_LOW(color,32 - shift);
+					FB_SHIFT_LOW(color,32 - shift);
 			}
 			shift += bpp;
 			shift &= (32 - 1);
@@ -196,7 +185,7 @@
 
 		/* write trailing bits */
  		if (shift) {
-			u32 end_mask = SHIFT_HIGH(~(u32)0, shift);
+			u32 end_mask = FB_SHIFT_HIGH(~(u32)0, shift);
 
 			FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
 		}
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index bcea87c..3660e51 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -2048,7 +2048,7 @@
 	struct fbcon_ops *ops;
 	struct display *p = &fb_display[vc->vc_num];
 	struct fb_var_screeninfo var;
-	int i, prev_console;
+	int i, prev_console, charcnt = 256;
 
 	info = registered_fb[con2fb_map[vc->vc_num]];
 	ops = info->fbcon_par;
@@ -2103,7 +2103,8 @@
 	fb_set_var(info, &var);
 	ops->var = info->var;
 
-	if (old_info != NULL && old_info != info) {
+	if (old_info != NULL && (old_info != info ||
+				 info->flags & FBINFO_MISC_ALWAYS_SETPAR)) {
 		if (info->fbops->fb_set_par)
 			info->fbops->fb_set_par(info);
 		fbcon_del_cursor_timer(old_info);
@@ -2120,6 +2121,13 @@
 
 	vc->vc_can_do_color = (fb_get_color_depth(&info->var, &info->fix)!=1);
 	vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
+
+	if (p->userfont)
+		charcnt = FNTCHARCNT(vc->vc_font.data);
+
+	if (charcnt > 256)
+		vc->vc_complement_mask <<= 1;
+
 	updatescrollmode(p, info, vc);
 
 	switch (p->scrollmode) {
@@ -2139,8 +2147,12 @@
 
 	scrollback_max = 0;
 	scrollback_current = 0;
-	ops->var.xoffset = ops->var.yoffset = p->yscroll = 0;
-	ops->update_start(info);
+
+	if (!fbcon_is_inactive(vc, info)) {
+	    ops->var.xoffset = ops->var.yoffset = p->yscroll = 0;
+	    ops->update_start(info);
+	}
+
 	fbcon_set_palette(vc, color_table); 	
 	fbcon_clear_margins(vc, 0);
 
@@ -2184,11 +2196,14 @@
 		ops->graphics = 1;
 
 		if (!blank) {
+			if (info->fbops->fb_save_state)
+				info->fbops->fb_save_state(info);
 			var.activate = FB_ACTIVATE_NOW | FB_ACTIVATE_FORCE;
 			fb_set_var(info, &var);
 			ops->graphics = 0;
 			ops->var = info->var;
-		}
+		} else if (info->fbops->fb_restore_state)
+			info->fbops->fb_restore_state(info);
 	}
 
  	if (!fbcon_is_inactive(vc, info)) {
@@ -2736,8 +2751,12 @@
 		updatescrollmode(p, info, vc);
 		scrollback_max = 0;
 		scrollback_current = 0;
-		ops->var.xoffset = ops->var.yoffset = p->yscroll = 0;
-		ops->update_start(info);
+
+		if (!fbcon_is_inactive(vc, info)) {
+		    ops->var.xoffset = ops->var.yoffset = p->yscroll = 0;
+		    ops->update_start(info);
+		}
+
 		fbcon_set_palette(vc, color_table);
 		update_screen(vc);
 		if (softback_buf)
@@ -2774,8 +2793,13 @@
 			updatescrollmode(p, info, vc);
 			scrollback_max = 0;
 			scrollback_current = 0;
-			ops->var.xoffset = ops->var.yoffset = p->yscroll = 0;
-			ops->update_start(info);
+
+			if (!fbcon_is_inactive(vc, info)) {
+			    ops->var.xoffset = ops->var.yoffset =
+				p->yscroll = 0;
+			    ops->update_start(info);
+			}
+
 			fbcon_set_palette(vc, color_table);
 			update_screen(vc);
 			if (softback_buf)
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 6240aed..10dfdf0 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -722,14 +722,30 @@
 int
 fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var)
 {
+	struct fb_fix_screeninfo *fix = &info->fix;
         int xoffset = var->xoffset;
         int yoffset = var->yoffset;
-        int err;
+        int err = 0, yres = info->var.yres;
 
-        if (xoffset < 0 || yoffset < 0 || !info->fbops->fb_pan_display ||
-            xoffset + info->var.xres > info->var.xres_virtual ||
-            yoffset + info->var.yres > info->var.yres_virtual)
-                return -EINVAL;
+	if (var->yoffset > 0) {
+		if (var->vmode & FB_VMODE_YWRAP) {
+			if (!fix->ywrapstep || (var->yoffset % fix->ywrapstep))
+				err = -EINVAL;
+			else
+				yres = 0;
+		} else if (!fix->ypanstep || (var->yoffset % fix->ypanstep))
+			err = -EINVAL;
+	}
+
+	if (var->xoffset > 0 && (!fix->xpanstep ||
+				 (var->xoffset % fix->xpanstep)))
+		err = -EINVAL;
+
+        if (err || !info->fbops->fb_pan_display || xoffset < 0 ||
+	    yoffset < 0 || var->yoffset + yres > info->var.yres_virtual ||
+	    var->xoffset + info->var.xres > info->var.xres_virtual)
+		return -EINVAL;
+
 	if ((err = info->fbops->fb_pan_display(var, info)))
 		return err;
         info->var.xoffset = var->xoffset;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 04a58f3..a973be2 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -617,6 +617,12 @@
 
 	/* perform fb specific mmap */
 	int (*fb_mmap)(struct fb_info *info, struct file *file, struct vm_area_struct *vma);
+
+	/* save current hardware state */
+	void (*fb_save_state)(struct fb_info *info);
+
+	/* restore saved state */
+	void (*fb_restore_state)(struct fb_info *info);
 };
 
 #ifdef CONFIG_FB_TILEBLITTING
@@ -726,6 +732,18 @@
 						  from userspace */
 #define FBINFO_MISC_TILEBLITTING       0x20000 /* use tile blitting */
 
+/* A driver may set this flag to indicate that it does want a set_par to be
+ * called every time when fbcon_switch is executed. The advantage is that with
+ * this flag set you can really be shure that set_par is always called before
+ * any of the functions dependant on the correct hardware state or altering
+ * that state, even if you are using some broken X releases. The disadvantage
+ * is that it introduces unwanted delays to every console switch if set_par
+ * is slow. It is a good idea to try this flag in the drivers initialization
+ * code whenever there is a bug report related to switching between X and the
+ * framebuffer console.
+ */
+#define FBINFO_MISC_ALWAYS_SETPAR   0x40000
+
 struct fb_info {
 	int node;
 	int flags;
@@ -817,6 +835,18 @@
 
 #endif
 
+#if defined (__BIG_ENDIAN)
+#define FB_LEFT_POS(bpp)          (32 - bpp)
+#define FB_SHIFT_HIGH(val, bits)  ((val) >> (bits))
+#define FB_SHIFT_LOW(val, bits)   ((val) << (bits))
+#define FB_BIT_NR(b)              (7 - (b))
+#else
+#define FB_LEFT_POS(bpp)          (0)
+#define FB_SHIFT_HIGH(val, bits)  ((val) << (bits))
+#define FB_SHIFT_LOW(val, bits)   ((val) >> (bits))
+#define FB_BIT_NR(b)              (b)
+#endif
+
     /*
      *  `Generic' versions of the frame buffer device operations
      */