[PATCH] ppc64: support 64k pages
Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel
base page size to 64K. The resulting kernel still boots on any
hardware. On current machines with 4K pages support only, the kernel
will maintain 16 "subpages" for each 64K page transparently.
Note that while real 64K capable HW has been tested, the current patch
will not enable it yet as such hardware is not released yet, and I'm
still verifying with the firmware architects the proper to get the
information from the newer hypervisors.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/powerpc/platforms/iseries/htab.c b/arch/powerpc/platforms/iseries/htab.c
index b3c6c33..30bdcf3 100644
--- a/arch/powerpc/platforms/iseries/htab.c
+++ b/arch/powerpc/platforms/iseries/htab.c
@@ -39,15 +39,16 @@
spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]);
}
-static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
- unsigned long prpn, unsigned long vflags,
- unsigned long rflags)
+long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
+ unsigned long pa, unsigned long rflags,
+ unsigned long vflags, int psize)
{
- unsigned long arpn;
long slot;
hpte_t lhpte;
int secondary = 0;
+ BUG_ON(psize != MMU_PAGE_4K);
+
/*
* The hypervisor tries both primary and secondary.
* If we are being called to insert in the secondary,
@@ -59,8 +60,19 @@
iSeries_hlock(hpte_group);
- slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
- BUG_ON(lhpte.v & HPTE_V_VALID);
+ slot = HvCallHpt_findValid(&lhpte, va >> HW_PAGE_SHIFT);
+ if (unlikely(lhpte.v & HPTE_V_VALID)) {
+ if (vflags & HPTE_V_BOLTED) {
+ HvCallHpt_setSwBits(slot, 0x10, 0);
+ HvCallHpt_setPp(slot, PP_RWXX);
+ iSeries_hunlock(hpte_group);
+ if (slot < 0)
+ return 0x8 | (slot & 7);
+ else
+ return slot & 7;
+ }
+ BUG();
+ }
if (slot == -1) { /* No available entry found in either group */
iSeries_hunlock(hpte_group);
@@ -73,10 +85,9 @@
slot &= 0x7fffffffffffffff;
}
- arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT;
- lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
- lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
+ lhpte.v = hpte_encode_v(va, MMU_PAGE_4K) | vflags | HPTE_V_VALID;
+ lhpte.r = hpte_encode_r(phys_to_abs(pa), MMU_PAGE_4K) | rflags;
/* Now fill in the actual HPTE */
HvCallHpt_addValidate(slot, secondary, &lhpte);
@@ -86,25 +97,6 @@
return (secondary << 3) | (slot & 7);
}
-long iSeries_hpte_bolt_or_insert(unsigned long hpte_group,
- unsigned long va, unsigned long prpn, unsigned long vflags,
- unsigned long rflags)
-{
- long slot;
- hpte_t lhpte;
-
- slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT);
-
- if (lhpte.v & HPTE_V_VALID) {
- /* Bolt the existing HPTE */
- HvCallHpt_setSwBits(slot, 0x10, 0);
- HvCallHpt_setPp(slot, PP_RWXX);
- return 0;
- }
-
- return iSeries_hpte_insert(hpte_group, va, prpn, vflags, rflags);
-}
-
static unsigned long iSeries_hpte_getword0(unsigned long slot)
{
hpte_t hpte;
@@ -150,15 +142,17 @@
* bits 61..63 : PP2,PP1,PP0
*/
static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int large, int local)
+ unsigned long va, int psize, int local)
{
hpte_t hpte;
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
iSeries_hlock(slot);
HvCallHpt_get(&hpte, slot);
- if ((HPTE_V_AVPN_VAL(hpte.v) == avpn) && (hpte.v & HPTE_V_VALID)) {
+ want_v = hpte_encode_v(va, MMU_PAGE_4K);
+
+ if (HPTE_V_COMPARE(hpte.v, want_v) && (hpte.v & HPTE_V_VALID)) {
/*
* Hypervisor expects bits as NPPP, which is
* different from how they are mapped in our PP.
@@ -210,14 +204,17 @@
*
* No need to lock here because we should be the only user.
*/
-static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
+static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
+ int psize)
{
unsigned long vsid,va,vpn;
long slot;
+ BUG_ON(psize != MMU_PAGE_4K);
+
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> PAGE_SHIFT;
+ vpn = va >> HW_PAGE_SHIFT;
slot = iSeries_hpte_find(vpn);
if (slot == -1)
panic("updateboltedpp: Could not find page to bolt\n");
@@ -225,7 +222,7 @@
}
static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va,
- int large, int local)
+ int psize, int local)
{
unsigned long hpte_v;
unsigned long avpn = va >> 23;
diff --git a/arch/powerpc/platforms/iseries/hvlog.c b/arch/powerpc/platforms/iseries/hvlog.c
index 62ec734..f476d71 100644
--- a/arch/powerpc/platforms/iseries/hvlog.c
+++ b/arch/powerpc/platforms/iseries/hvlog.c
@@ -22,7 +22,7 @@
while (len) {
hv_buf.addr = cur;
- left_this_page = ((cur & PAGE_MASK) + PAGE_SIZE) - cur;
+ left_this_page = ((cur & HW_PAGE_MASK) + HW_PAGE_SIZE) - cur;
if (left_this_page > len)
left_this_page = len;
hv_buf.len = left_this_page;
@@ -30,6 +30,6 @@
HvCall2(HvCallBaseWriteLogBuffer,
virt_to_abs(&hv_buf),
left_this_page);
- cur = (cur & PAGE_MASK) + PAGE_SIZE;
+ cur = (cur & HW_PAGE_MASK) + HW_PAGE_SIZE;
}
}
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index 1a6845b..bf081b3 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -43,9 +43,12 @@
u64 rc;
union tce_entry tce;
+ index <<= TCE_PAGE_FACTOR;
+ npages <<= TCE_PAGE_FACTOR;
+
while (npages--) {
tce.te_word = 0;
- tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> PAGE_SHIFT;
+ tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> TCE_SHIFT;
if (tbl->it_type == TCE_VB) {
/* Virtual Bus */
@@ -66,7 +69,7 @@
panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n",
rc);
index++;
- uaddr += PAGE_SIZE;
+ uaddr += TCE_PAGE_SIZE;
}
}
@@ -74,6 +77,9 @@
{
u64 rc;
+ npages <<= TCE_PAGE_FACTOR;
+ index <<= TCE_PAGE_FACTOR;
+
while (npages--) {
rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0);
if (rc)
@@ -83,6 +89,50 @@
}
}
+/*
+ * Call Hv with the architected data structure to get TCE table info.
+ * info. Put the returned data into the Linux representation of the
+ * TCE table data.
+ * The Hardware Tce table comes in three flavors.
+ * 1. TCE table shared between Buses.
+ * 2. TCE table per Bus.
+ * 3. TCE Table per IOA.
+ */
+void iommu_table_getparms_iSeries(unsigned long busno,
+ unsigned char slotno,
+ unsigned char virtbus,
+ struct iommu_table* tbl)
+{
+ struct iommu_table_cb *parms;
+
+ parms = kmalloc(sizeof(*parms), GFP_KERNEL);
+ if (parms == NULL)
+ panic("PCI_DMA: TCE Table Allocation failed.");
+
+ memset(parms, 0, sizeof(*parms));
+
+ parms->itc_busno = busno;
+ parms->itc_slotno = slotno;
+ parms->itc_virtbus = virtbus;
+
+ HvCallXm_getTceTableParms(iseries_hv_addr(parms));
+
+ if (parms->itc_size == 0)
+ panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms);
+
+ /* itc_size is in pages worth of table, it_size is in # of entries */
+ tbl->it_size = ((parms->itc_size * TCE_PAGE_SIZE) /
+ sizeof(union tce_entry)) >> TCE_PAGE_FACTOR;
+ tbl->it_busno = parms->itc_busno;
+ tbl->it_offset = parms->itc_offset >> TCE_PAGE_FACTOR;
+ tbl->it_index = parms->itc_index;
+ tbl->it_blocksize = 1;
+ tbl->it_type = virtbus ? TCE_VB : TCE_PCI;
+
+ kfree(parms);
+}
+
+
#ifdef CONFIG_PCI
/*
* This function compares the known tables to find an iommu_table
@@ -104,46 +154,6 @@
return NULL;
}
-/*
- * Call Hv with the architected data structure to get TCE table info.
- * info. Put the returned data into the Linux representation of the
- * TCE table data.
- * The Hardware Tce table comes in three flavors.
- * 1. TCE table shared between Buses.
- * 2. TCE table per Bus.
- * 3. TCE Table per IOA.
- */
-static void iommu_table_getparms(struct pci_dn *pdn,
- struct iommu_table* tbl)
-{
- struct iommu_table_cb *parms;
-
- parms = kmalloc(sizeof(*parms), GFP_KERNEL);
- if (parms == NULL)
- panic("PCI_DMA: TCE Table Allocation failed.");
-
- memset(parms, 0, sizeof(*parms));
-
- parms->itc_busno = pdn->busno;
- parms->itc_slotno = pdn->LogicalSlot;
- parms->itc_virtbus = 0;
-
- HvCallXm_getTceTableParms(iseries_hv_addr(parms));
-
- if (parms->itc_size == 0)
- panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms);
-
- /* itc_size is in pages worth of table, it_size is in # of entries */
- tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry);
- tbl->it_busno = parms->itc_busno;
- tbl->it_offset = parms->itc_offset;
- tbl->it_index = parms->itc_index;
- tbl->it_blocksize = 1;
- tbl->it_type = TCE_PCI;
-
- kfree(parms);
-}
-
void iommu_devnode_init_iSeries(struct device_node *dn)
{
@@ -152,7 +162,7 @@
tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
- iommu_table_getparms(pdn, tbl);
+ iommu_table_getparms_iSeries(pdn->busno, pdn->LogicalSlot, 0, tbl);
/* Look for existing tce table */
pdn->iommu_table = iommu_table_find(tbl);
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index fda712b..c5207064 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -320,11 +320,11 @@
*/
if (naca.xRamDisk) {
initrd_start = (unsigned long)__va(naca.xRamDisk);
- initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE;
+ initrd_end = initrd_start + naca.xRamDiskSize * HW_PAGE_SIZE;
initrd_below_start_ok = 1; // ramdisk in kernel space
ROOT_DEV = Root_RAM0;
- if (((rd_size * 1024) / PAGE_SIZE) < naca.xRamDiskSize)
- rd_size = (naca.xRamDiskSize * PAGE_SIZE) / 1024;
+ if (((rd_size * 1024) / HW_PAGE_SIZE) < naca.xRamDiskSize)
+ rd_size = (naca.xRamDiskSize * HW_PAGE_SIZE) / 1024;
} else
#endif /* CONFIG_BLK_DEV_INITRD */
{
@@ -470,13 +470,14 @@
*/
hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress());
hptSizePages = (u32)HvCallHpt_getHptPages();
- hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT);
+ hptSizeChunks = hptSizePages >>
+ (MSCHUNKS_CHUNK_SHIFT - HW_PAGE_SHIFT);
hptLastChunk = hptFirstChunk + hptSizeChunks - 1;
printk("HPT absolute addr = %016lx, size = %dK\n",
chunk_to_addr(hptFirstChunk), hptSizeChunks * 256);
- ppc64_pft_size = __ilog2(hptSizePages * PAGE_SIZE);
+ ppc64_pft_size = __ilog2(hptSizePages * HW_PAGE_SIZE);
/*
* The actual hashed page table is in the hypervisor,
@@ -629,7 +630,7 @@
*/
if (naca.xRamDisk)
klimit = KERNELBASE + (u64)naca.xRamDisk +
- (naca.xRamDiskSize * PAGE_SIZE);
+ (naca.xRamDiskSize * HW_PAGE_SIZE);
else {
/*
* No ram disk was included - check and see if there
diff --git a/arch/powerpc/platforms/iseries/vio.c b/arch/powerpc/platforms/iseries/vio.c
index c27a668..384360e 100644
--- a/arch/powerpc/platforms/iseries/vio.c
+++ b/arch/powerpc/platforms/iseries/vio.c
@@ -30,41 +30,14 @@
static void __init iommu_vio_init(void)
{
- struct iommu_table *t;
- struct iommu_table_cb cb;
- unsigned long cbp;
- unsigned long itc_entries;
+ iommu_table_getparms_iSeries(255, 0, 0xff, &veth_iommu_table);
+ veth_iommu_table.it_size /= 2;
+ vio_iommu_table = veth_iommu_table;
+ vio_iommu_table.it_offset += veth_iommu_table.it_size;
- cb.itc_busno = 255; /* Bus 255 is the virtual bus */
- cb.itc_virtbus = 0xff; /* Ask for virtual bus */
-
- cbp = virt_to_abs(&cb);
- HvCallXm_getTceTableParms(cbp);
-
- itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
- veth_iommu_table.it_size = itc_entries / 2;
- veth_iommu_table.it_busno = cb.itc_busno;
- veth_iommu_table.it_offset = cb.itc_offset;
- veth_iommu_table.it_index = cb.itc_index;
- veth_iommu_table.it_type = TCE_VB;
- veth_iommu_table.it_blocksize = 1;
-
- t = iommu_init_table(&veth_iommu_table);
-
- if (!t)
+ if (!iommu_init_table(&veth_iommu_table))
printk("Virtual Bus VETH TCE table failed.\n");
-
- vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size;
- vio_iommu_table.it_busno = cb.itc_busno;
- vio_iommu_table.it_offset = cb.itc_offset +
- veth_iommu_table.it_size;
- vio_iommu_table.it_index = cb.itc_index;
- vio_iommu_table.it_type = TCE_VB;
- vio_iommu_table.it_blocksize = 1;
-
- t = iommu_init_table(&vio_iommu_table);
-
- if (!t)
+ if (!iommu_init_table(&vio_iommu_table))
printk("Virtual Bus VIO TCE table failed.\n");
}
diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c
index fe97bfb..84267269 100644
--- a/arch/powerpc/platforms/iseries/viopath.c
+++ b/arch/powerpc/platforms/iseries/viopath.c
@@ -68,7 +68,8 @@
* For each kind of event we allocate a buffer that is
* guaranteed not to cross a page boundary
*/
-static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] __page_aligned;
+static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256]
+ __attribute__((__aligned__(4096)));
static atomic_t event_buffer_available[VIO_MAX_SUBTYPES];
static int event_buffer_initialised;
@@ -116,12 +117,12 @@
HvLpEvent_Rc hvrc;
DECLARE_MUTEX_LOCKED(Semaphore);
- buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ buf = kmalloc(HW_PAGE_SIZE, GFP_KERNEL);
if (!buf)
return 0;
- memset(buf, 0, PAGE_SIZE);
+ memset(buf, 0, HW_PAGE_SIZE);
- handle = dma_map_single(iSeries_vio_dev, buf, PAGE_SIZE,
+ handle = dma_map_single(iSeries_vio_dev, buf, HW_PAGE_SIZE,
DMA_FROM_DEVICE);
hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
@@ -131,7 +132,7 @@
viopath_sourceinst(viopath_hostLp),
viopath_targetinst(viopath_hostLp),
(u64)(unsigned long)&Semaphore, VIOVERSION << 16,
- ((u64)handle) << 32, PAGE_SIZE, 0, 0);
+ ((u64)handle) << 32, HW_PAGE_SIZE, 0, 0);
if (hvrc != HvLpEvent_Rc_Good)
printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc);
@@ -140,7 +141,7 @@
vlanMap = HvLpConfig_getVirtualLanIndexMap();
- buf[PAGE_SIZE-1] = '\0';
+ buf[HW_PAGE_SIZE-1] = '\0';
seq_printf(m, "%s", buf);
seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap);
seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n",
@@ -152,7 +153,8 @@
e2a(xItExtVpdPanel.systemSerial[4]),
e2a(xItExtVpdPanel.systemSerial[5]));
- dma_unmap_single(iSeries_vio_dev, handle, PAGE_SIZE, DMA_FROM_DEVICE);
+ dma_unmap_single(iSeries_vio_dev, handle, HW_PAGE_SIZE,
+ DMA_FROM_DEVICE);
kfree(buf);
return 0;