Auto merge with /home/aegl/GIT/linus
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 2e08942..cbb3e0c 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -220,13 +220,6 @@
 	depends on !IA64_HP_SIM
 	default y
 
-config IA64_SGI_SN_SIM
-	bool "SGI Medusa Simulator Support"
-	depends on IA64_SGI_SN2 || IA64_GENERIC
-	help
-	  If you are compiling a kernel that will run under SGI's IA-64
-	  simulator (Medusa) then say Y, otherwise say N.
-
 config IA64_SGI_SN_XP
 	tristate "Support communication between SGI SSIs"
 	select IA64_UNCACHED_ALLOCATOR
diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig
index c056139..04d0b00 100644
--- a/arch/ia64/configs/sn2_defconfig
+++ b/arch/ia64/configs/sn2_defconfig
@@ -81,7 +81,6 @@
 CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
 # CONFIG_IA64_CYCLONE is not set
 CONFIG_IOSAPIC=y
-CONFIG_IA64_SGI_SN_SIM=y
 CONFIG_FORCE_MAX_ZONEORDER=18
 CONFIG_SMP=y
 CONFIG_NR_CPUS=512
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 5c7c957..84f89da 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -20,6 +20,7 @@
  * 02/01/00 R.Seth	fixed get_cpuinfo for SMP
  * 01/07/99 S.Eranian	added the support for command line argument
  * 06/24/99 W.Drummond	added boot_cpu_data.
+ * 05/28/05 Z. Menyhart	Dynamic stride size for "flush_icache_range()"
  */
 #include <linux/config.h>
 #include <linux/module.h>
@@ -85,6 +86,13 @@
 unsigned int num_io_spaces;
 
 /*
+ * "flush_icache_range()" needs to know what processor dependent stride size to use
+ * when it makes i-cache(s) coherent with d-caches.
+ */
+#define	I_CACHE_STRIDE_SHIFT	5	/* Safest way to go: 32 bytes by 32 bytes */
+unsigned long ia64_i_cache_stride_shift = ~0;
+
+/*
  * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1).  This
  * mask specifies a mask of address bits that must be 0 in order for two buffers to be
  * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
@@ -628,6 +636,12 @@
 	/* start_kernel() requires this... */
 }
 
+/*
+ * Calculate the max. cache line size.
+ *
+ * In addition, the minimum of the i-cache stride sizes is calculated for
+ * "flush_icache_range()".
+ */
 static void
 get_max_cacheline_size (void)
 {
@@ -641,6 +655,8 @@
                 printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
                        __FUNCTION__, status);
                 max = SMP_CACHE_BYTES;
+		/* Safest setup for "flush_icache_range()" */
+		ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;
 		goto out;
         }
 
@@ -649,14 +665,31 @@
 						    &cci);
 		if (status != 0) {
 			printk(KERN_ERR
-			       "%s: ia64_pal_cache_config_info(l=%lu) failed (status=%ld)\n",
+			       "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",
 			       __FUNCTION__, l, status);
 			max = SMP_CACHE_BYTES;
+			/* The safest setup for "flush_icache_range()" */
+			cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+			cci.pcci_unified = 1;
 		}
 		line_size = 1 << cci.pcci_line_size;
 		if (line_size > max)
 			max = line_size;
-        }
+		if (!cci.pcci_unified) {
+			status = ia64_pal_cache_config_info(l,
+						    /* cache_type (instruction)= */ 1,
+						    &cci);
+			if (status != 0) {
+				printk(KERN_ERR
+				"%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",
+					__FUNCTION__, l, status);
+				/* The safest setup for "flush_icache_range()" */
+				cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+			}
+		}
+		if (cci.pcci_stride < ia64_i_cache_stride_shift)
+			ia64_i_cache_stride_shift = cci.pcci_stride;
+	}
   out:
 	if (max > ia64_max_cacheline_size)
 		ia64_max_cacheline_size = max;
diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S
index a1af914..3e2cfa2 100644
--- a/arch/ia64/lib/flush.S
+++ b/arch/ia64/lib/flush.S
@@ -3,37 +3,59 @@
  *
  * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 05/28/05 Zoltan Menyhart	Dynamic stride size
  */
+
 #include <asm/asmmacro.h>
-#include <asm/page.h>
+
 
 	/*
 	 * flush_icache_range(start,end)
-	 *	Must flush range from start to end-1 but nothing else (need to
+	 *
+	 *	Make i-cache(s) coherent with d-caches.
+	 *
+	 *	Must deal with range from start to end-1 but nothing else (need to
 	 *	be careful not to touch addresses that may be unmapped).
+	 *
+	 *	Note: "in0" and "in1" are preserved for debugging purposes.
 	 */
 GLOBAL_ENTRY(flush_icache_range)
+
 	.prologue
-	alloc r2=ar.pfs,2,0,0,0
-	sub r8=in1,in0,1
+	alloc	r2=ar.pfs,2,0,0,0
+	movl	r3=ia64_i_cache_stride_shift
+ 	mov	r21=1
 	;;
-	shr.u r8=r8,5			// we flush 32 bytes per iteration
-	.save ar.lc, r3
-	mov r3=ar.lc			// save ar.lc
+	ld8	r20=[r3]		// r20: stride shift
+	sub	r22=in1,r0,1		// last byte address
+	;;
+	shr.u	r23=in0,r20		// start / (stride size)
+	shr.u	r22=r22,r20		// (last byte address) / (stride size)
+	shl	r21=r21,r20		// r21: stride size of the i-cache(s)
+	;;
+	sub	r8=r22,r23		// number of strides - 1
+	shl	r24=r23,r20		// r24: addresses for "fc.i" =
+					//	"start" rounded down to stride boundary
+	.save	ar.lc,r3
+	mov	r3=ar.lc		// save ar.lc
 	;;
 
 	.body
-
-	mov ar.lc=r8
+	mov	ar.lc=r8
 	;;
-.Loop:	fc.i in0			// issuable on M2 only
-	add in0=32,in0
+	/*
+	 * 32 byte aligned loop, even number of (actually 2) bundles
+	 */
+.Loop:	fc.i	r24			// issuable on M0 only
+	add	r24=r21,r24		// we flush "stride size" bytes per iteration
+	nop.i	0
 	br.cloop.sptk.few .Loop
 	;;
 	sync.i
 	;;
 	srlz.i
 	;;
-	mov ar.lc=r3			// restore ar.lc
+	mov	ar.lc=r3		// restore ar.lc
 	br.ret.sptk.many rp
 END(flush_icache_range)
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 720a861..54d9ed4 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -157,6 +157,7 @@
 
 	memset(controller, 0, sizeof(*controller));
 	controller->segment = seg;
+	controller->node = -1;
 	return controller;
 }
 
@@ -288,6 +289,7 @@
 	unsigned int windows = 0;
 	struct pci_bus *pbus;
 	char *name;
+	int pxm;
 
 	controller = alloc_pci_controller(domain);
 	if (!controller)
@@ -295,10 +297,16 @@
 
 	controller->acpi_handle = device->handle;
 
+	pxm = acpi_get_pxm(controller->acpi_handle);
+#ifdef CONFIG_NUMA
+	if (pxm >= 0)
+		controller->node = pxm_to_nid_map[pxm];
+#endif
+
 	acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
 			&windows);
-	controller->window = kmalloc(sizeof(*controller->window) * windows,
-			GFP_KERNEL);
+	controller->window = kmalloc_node(sizeof(*controller->window) * windows,
+			GFP_KERNEL, controller->node);
 	if (!controller->window)
 		goto out2;
 
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index a67f39e..a6649ba 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -61,7 +61,7 @@
 }
 
 static void *
-sn_default_pci_bus_fixup(struct pcibus_bussoft *soft)
+sn_default_pci_bus_fixup(struct pcibus_bussoft *soft, struct pci_controller *controller)
 {
 	return NULL;
 }
@@ -362,7 +362,7 @@
 
 	provider_soft = NULL;
 	if (provider->bus_fixup)
-		provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr);
+		provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, controller);
 
 	if (provider_soft == NULL)
 		return;		/* fixup failed or not applicable */
@@ -380,6 +380,22 @@
 	SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info =
 	    &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]);
 
+	/*
+	 * If the node information we obtained during the fixup phase is invalid
+	 * then set controller->node to -1 (undetermined)
+	 */
+	if (controller->node >= num_online_nodes()) {
+		struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus);
+
+		printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%lu"
+				    "L_IO=%lx L_MEM=%lx BASE=%lx\n",
+			b->bs_asic_type, b->bs_xid, b->bs_persist_busnum,
+			b->bs_legacy_io, b->bs_legacy_mem, b->bs_base);
+		printk(KERN_WARNING "on node %d but only %d nodes online."
+			"Association set to undetermined.\n",
+			controller->node, num_online_nodes());
+		controller->node = -1;
+	}
 	return;
 
 error_return:
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index 6d02dac..94698be 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -72,7 +72,7 @@
 enum xpc_retval
 xpc_setup_infrastructure(struct xpc_partition *part)
 {
-	int ret;
+	int ret, cpuid;
 	struct timer_list *timer;
 	partid_t partid = XPC_PARTID(part);
 
@@ -223,9 +223,9 @@
 	xpc_vars_part[partid].openclose_args_pa =
 					__pa(part->local_openclose_args);
 	xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va);
-	xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(smp_processor_id());
-	xpc_vars_part[partid].IPI_phys_cpuid =
-					cpu_physical_id(smp_processor_id());
+	cpuid = raw_smp_processor_id();	/* any CPU in this partition will do */
+	xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid);
+	xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid);
 	xpc_vars_part[partid].nchannels = part->nchannels;
 	xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
 
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index a2f7a88..0e4b9ad 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -79,6 +79,7 @@
 {
 	void *cpuaddr;
 	unsigned long phys_addr;
+	int node;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 
@@ -86,10 +87,19 @@
 
 	/*
 	 * Allocate the memory.
-	 * FIXME: We should be doing alloc_pages_node for the node closest
-	 *        to the PCI device.
 	 */
-	if (!(cpuaddr = (void *)__get_free_pages(GFP_ATOMIC, get_order(size))))
+	node = pcibus_to_node(pdev->bus);
+	if (likely(node >=0)) {
+		struct page *p = alloc_pages_node(node, GFP_ATOMIC, get_order(size));
+
+		if (likely(p))
+			cpuaddr = page_address(p);
+		else
+			return NULL;
+	} else
+		cpuaddr = (void *)__get_free_pages(GFP_ATOMIC, get_order(size));
+
+	if (unlikely(!cpuaddr))
 		return NULL;
 
 	memset(cpuaddr, 0x0, size);
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
index 9813da5..b95e9286 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -85,7 +85,7 @@
 }
 
 void *
-pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft)
+pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
 {
 	int nasid, cnode, j;
 	struct hubdev_info *hubdev_info;
@@ -158,6 +158,14 @@
 	memset(soft->pbi_int_ate_resource.ate, 0,
  	       (soft->pbi_int_ate_size * sizeof(uint64_t)));
 
+	if (prom_bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP)
+		/*
+		 * TIO PCI Bridge with no closest node information.
+		 * FIXME: Find another way to determine the closest node
+		 */
+		controller->node = -1;
+	else
+		controller->node = cnode;
 	return soft;
 }
 
diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c
index 51cc4e6..5d76a75 100644
--- a/arch/ia64/sn/pci/tioca_provider.c
+++ b/arch/ia64/sn/pci/tioca_provider.c
@@ -581,7 +581,7 @@
  * the caller.
  */
 static void *
-tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft)
+tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
 {
 	struct tioca_common *tioca_common;
 	struct tioca_kernel *tioca_kern;
@@ -646,6 +646,8 @@
 		       __FUNCTION__, SGI_TIOCA_ERROR,
 		       (int)tioca_common->ca_common.bs_persist_busnum);
 
+	/* Setup locality information */
+	controller->node = tioca_kern->ca_closest_node;
 	return tioca_common;
 }
 
diff --git a/drivers/firmware/pcdp.h b/drivers/firmware/pcdp.h
index e72cc47..ce910d6 100644
--- a/drivers/firmware/pcdp.h
+++ b/drivers/firmware/pcdp.h
@@ -52,6 +52,8 @@
 	u32				clock_rate;
 	u8				pci_prog_intfc;
 	u8				flags;
+	u16				conout_index;
+	u32				reserved;
 } __attribute__((packed));
 
 #define PCDP_IF_PCI	1
diff --git a/include/asm-ia64/pci.h b/include/asm-ia64/pci.h
index f11771e..dba9f22 100644
--- a/include/asm-ia64/pci.h
+++ b/include/asm-ia64/pci.h
@@ -128,6 +128,7 @@
 	void *acpi_handle;
 	void *iommu;
 	int segment;
+	int node;		/* nearest node with memory or -1 for global allocation */
 
 	unsigned int windows;
 	struct pci_window *window;
diff --git a/include/asm-ia64/sn/pcibr_provider.h b/include/asm-ia64/sn/pcibr_provider.h
index f9b8d21..2b42d9e 100644
--- a/include/asm-ia64/sn/pcibr_provider.h
+++ b/include/asm-ia64/sn/pcibr_provider.h
@@ -128,7 +128,7 @@
 #define pcibr_unlock(pcibus_info, flag)  spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag)
 
 extern int  pcibr_init_provider(void);
-extern void *pcibr_bus_fixup(struct pcibus_bussoft *);
+extern void *pcibr_bus_fixup(struct pcibus_bussoft *, struct pci_controller *);
 extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t);
 extern dma_addr_t pcibr_dma_map_consistent(struct pci_dev *, unsigned long, size_t);
 extern void pcibr_dma_unmap(struct pci_dev *, dma_addr_t, int);
diff --git a/include/asm-ia64/sn/pcibus_provider_defs.h b/include/asm-ia64/sn/pcibus_provider_defs.h
index 04e27d5..976f5ef 100644
--- a/include/asm-ia64/sn/pcibus_provider_defs.h
+++ b/include/asm-ia64/sn/pcibus_provider_defs.h
@@ -37,6 +37,7 @@
 	struct xwidget_info	*bs_xwidget_info;
 };
 
+struct pci_controller;
 /*
  * SN pci bus indirection
  */
@@ -45,7 +46,7 @@
 	dma_addr_t	(*dma_map)(struct pci_dev *, unsigned long, size_t);
 	dma_addr_t	(*dma_map_consistent)(struct pci_dev *, unsigned long, size_t);
 	void		(*dma_unmap)(struct pci_dev *, dma_addr_t, int);
-	void *		(*bus_fixup)(struct pcibus_bussoft *);
+	void *		(*bus_fixup)(struct pcibus_bussoft *, struct pci_controller *);
 };
 
 extern struct sn_pcibus_provider *sn_pci_provider[];
diff --git a/include/asm-ia64/sn/simulator.h b/include/asm-ia64/sn/simulator.h
index cf770e2..16a48b5 100644
--- a/include/asm-ia64/sn/simulator.h
+++ b/include/asm-ia64/sn/simulator.h
@@ -13,16 +13,9 @@
 #define SNMAGIC 0xaeeeeeee8badbeefL
 #define IS_MEDUSA()			({long sn; asm("mov %0=cpuid[%1]" : "=r"(sn) : "r"(2)); sn == SNMAGIC;})
 
-#ifdef CONFIG_IA64_SGI_SN_SIM
 #define SIMULATOR_SLEEP()		asm("nop.i 0x8beef")
-#define IS_RUNNING_ON_SIMULATOR() 	(sn_prom_type)
+#define IS_RUNNING_ON_SIMULATOR()	(sn_prom_type)
 #define IS_RUNNING_ON_FAKE_PROM()	(sn_prom_type == 2)
 extern int sn_prom_type;		/* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
-#else
-#define IS_RUNNING_ON_SIMULATOR()	(0)
-#define IS_RUNNING_ON_FAKE_PROM()	(0)
-#define SIMULATOR_SLEEP()
-
-#endif
 
 #endif /* _ASM_IA64_SN_SIMULATOR_H */
diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h
index 4e64c2a..399bc29 100644
--- a/include/asm-ia64/topology.h
+++ b/include/asm-ia64/topology.h
@@ -40,6 +40,11 @@
  */
 #define node_to_first_cpu(node) (__ffs(node_to_cpumask(node)))
 
+/*
+ * Determines the node for a given pci bus
+ */
+#define pcibus_to_node(bus) PCI_CONTROLLER(bus)->node
+
 void build_cpu_to_node_map(void);
 
 #define SD_CPU_INIT (struct sched_domain) {		\