s390/mm: implement dirty bits for large segment table entries
The large segment table entry format has block of bits for the
ACC/F values for the large page. These bits are valid only if
another bit (AV bit 0x10000) of the segment table entry is set.
The ACC/F bits do not have a meaning if the AV bit is off.
This allows to put the THP splitting bit, the segment young bit
and the new segment dirty bit into the ACC/F bits as long as
the AV bit stays off. The dirty and young information is only
available if the pmd is large.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index fcba5e0..b76317c 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -287,7 +287,14 @@
#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */
#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */
-#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT
+
+#define _SEGMENT_ENTRY_DIRTY 0 /* No sw dirty bit for 31-bit */
+#define _SEGMENT_ENTRY_YOUNG 0 /* No sw young bit for 31-bit */
+#define _SEGMENT_ENTRY_READ 0 /* No sw read bit for 31-bit */
+#define _SEGMENT_ENTRY_WRITE 0 /* No sw write bit for 31-bit */
+#define _SEGMENT_ENTRY_LARGE 0 /* No large pages for 31-bit */
+#define _SEGMENT_ENTRY_BITS_LARGE 0
+#define _SEGMENT_ENTRY_ORIGIN_LARGE 0
#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL)
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
@@ -350,7 +357,7 @@
/* Bits in the segment table entry */
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
-#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL
+#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */
#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
@@ -359,30 +366,34 @@
#define _SEGMENT_ENTRY (0)
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
-#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
-#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
-#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */
-#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */
-#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG
+#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */
+#define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */
+#define _SEGMENT_ENTRY_SPLIT 0x0800 /* THP splitting bit */
+#define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */
+#define _SEGMENT_ENTRY_CO 0x0100 /* change-recording override */
+#define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */
+#define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */
/*
* Segment table entry encoding (R = read-only, I = invalid, y = young bit):
- * ..R...I...y.
- * prot-none, old ..0...1...1.
- * prot-none, young ..1...1...1.
- * read-only, old ..1...1...0.
- * read-only, young ..1...0...1.
- * read-write, old ..0...1...0.
- * read-write, young ..0...0...1.
+ * dy..R...I...wr
+ * prot-none, clean, old 00..1...1...00
+ * prot-none, clean, young 01..1...1...00
+ * prot-none, dirty, old 10..1...1...00
+ * prot-none, dirty, young 11..1...1...00
+ * read-only, clean, old 00..1...1...01
+ * read-only, clean, young 01..1...0...01
+ * read-only, dirty, old 10..1...1...01
+ * read-only, dirty, young 11..1...0...01
+ * read-write, clean, old 00..1...1...11
+ * read-write, clean, young 01..1...0...11
+ * read-write, dirty, old 10..0...1...11
+ * read-write, dirty, young 11..0...0...11
* The segment table origin is used to distinguish empty (origin==0) from
* read-write, old segment table entries (origin!=0)
*/
-#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */
-
-/* Set of bits not changed in pmd_modify */
-#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
- | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO)
+#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */
/* Page status table bits for virtualization */
#define PGSTE_ACC_BITS 0xf000000000000000UL
@@ -455,10 +466,11 @@
* Segment entry (large page) protection definitions.
*/
#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
- _SEGMENT_ENTRY_NONE)
-#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \
_SEGMENT_ENTRY_PROTECT)
-#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID)
+#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_PROTECT | \
+ _SEGMENT_ENTRY_READ)
+#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_READ | \
+ _SEGMENT_ENTRY_WRITE)
static inline int mm_has_pgste(struct mm_struct *mm)
{
@@ -569,25 +581,23 @@
static inline int pmd_large(pmd_t pmd)
{
-#ifdef CONFIG_64BIT
return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
-#else
- return 0;
-#endif
}
-static inline int pmd_prot_none(pmd_t pmd)
+static inline int pmd_pfn(pmd_t pmd)
{
- return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) &&
- (pmd_val(pmd) & _SEGMENT_ENTRY_NONE);
+ unsigned long origin_mask;
+
+ origin_mask = _SEGMENT_ENTRY_ORIGIN;
+ if (pmd_large(pmd))
+ origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
+ return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
}
static inline int pmd_bad(pmd_t pmd)
{
-#ifdef CONFIG_64BIT
if (pmd_large(pmd))
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
-#endif
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
}
@@ -607,20 +617,22 @@
#define __HAVE_ARCH_PMD_WRITE
static inline int pmd_write(pmd_t pmd)
{
- if (pmd_prot_none(pmd))
- return 0;
- return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
+}
+
+static inline int pmd_dirty(pmd_t pmd)
+{
+ int dirty = 1;
+ if (pmd_large(pmd))
+ dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
+ return dirty;
}
static inline int pmd_young(pmd_t pmd)
{
- int young = 0;
-#ifdef CONFIG_64BIT
- if (pmd_prot_none(pmd))
- young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0;
- else
+ int young = 1;
+ if (pmd_large(pmd))
young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
-#endif
return young;
}
@@ -1391,7 +1403,7 @@
#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
#define pte_page(x) pfn_to_page(pte_pfn(x))
-#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
+#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
/* Find an entry in the lowest level page table.. */
#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
@@ -1413,41 +1425,75 @@
return pgprot_val(SEGMENT_WRITE);
}
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE;
+ if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+ return pmd;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ }
+ return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ }
+ return pmd;
+}
+
static inline pmd_t pmd_mkyoung(pmd_t pmd)
{
-#ifdef CONFIG_64BIT
- if (pmd_prot_none(pmd)) {
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
- } else {
+ if (pmd_large(pmd)) {
pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
}
-#endif
return pmd;
}
static inline pmd_t pmd_mkold(pmd_t pmd)
{
-#ifdef CONFIG_64BIT
- if (pmd_prot_none(pmd)) {
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
- } else {
+ if (pmd_large(pmd)) {
pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
}
-#endif
return pmd;
}
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
- int young;
-
- young = pmd_young(pmd);
- pmd_val(pmd) &= _SEGMENT_CHG_MASK;
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
+ _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
+ _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT;
+ pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ return pmd;
+ }
+ pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN;
pmd_val(pmd) |= massage_pgprot_pmd(newprot);
- if (young)
- pmd = pmd_mkyoung(pmd);
return pmd;
}
@@ -1455,16 +1501,9 @@
{
pmd_t __pmd;
pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
- return pmd_mkyoung(__pmd);
+ return __pmd;
}
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
-{
- /* Do not clobber PROT_NONE segments! */
- if (!pmd_prot_none(pmd))
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
- return pmd;
-}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
static inline void __pmdp_csp(pmd_t *pmdp)
@@ -1555,34 +1594,21 @@
static inline int pmd_trans_splitting(pmd_t pmd)
{
- return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) &&
+ (pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT);
}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t entry)
{
- if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1)
- pmd_val(entry) |= _SEGMENT_ENTRY_CO;
*pmdp = entry;
}
static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
- return pmd;
-}
-
-static inline pmd_t pmd_wrprotect(pmd_t pmd)
-{
- /* Do not clobber PROT_NONE segments! */
- if (!pmd_prot_none(pmd))
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
- return pmd;
-}
-
-static inline pmd_t pmd_mkdirty(pmd_t pmd)
-{
- /* No dirty bit in the segment table entry. */
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
return pmd;
}
@@ -1647,11 +1673,6 @@
{
return MACHINE_HAS_HPAGE ? 1 : 0;
}
-
-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
- return pmd_val(pmd) >> PAGE_SHIFT;
-}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 0ff66a7..389bc17 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -10,42 +10,33 @@
static inline pmd_t __pte_to_pmd(pte_t pte)
{
- int none, young, prot;
pmd_t pmd;
/*
- * Convert encoding pte bits pmd bits
- * .IR...wrdytp ..R...I...y.
- * empty .10...000000 -> ..0...1...0.
- * prot-none, clean, old .11...000001 -> ..0...1...1.
- * prot-none, clean, young .11...000101 -> ..1...1...1.
- * prot-none, dirty, old .10...001001 -> ..0...1...1.
- * prot-none, dirty, young .10...001101 -> ..1...1...1.
- * read-only, clean, old .11...010001 -> ..1...1...0.
- * read-only, clean, young .01...010101 -> ..1...0...1.
- * read-only, dirty, old .11...011001 -> ..1...1...0.
- * read-only, dirty, young .01...011101 -> ..1...0...1.
- * read-write, clean, old .11...110001 -> ..0...1...0.
- * read-write, clean, young .01...110101 -> ..0...0...1.
- * read-write, dirty, old .10...111001 -> ..0...1...0.
- * read-write, dirty, young .00...111101 -> ..0...0...1.
- * Huge ptes are dirty by definition, a clean pte is made dirty
- * by the conversion.
+ * Convert encoding pte bits pmd bits
+ * .IR...wrdytp dy..R...I...wr
+ * empty .10...000000 -> 00..0...1...00
+ * prot-none, clean, old .11...000001 -> 00..1...1...00
+ * prot-none, clean, young .11...000101 -> 01..1...1...00
+ * prot-none, dirty, old .10...001001 -> 10..1...1...00
+ * prot-none, dirty, young .10...001101 -> 11..1...1...00
+ * read-only, clean, old .11...010001 -> 00..1...1...01
+ * read-only, clean, young .01...010101 -> 01..1...0...01
+ * read-only, dirty, old .11...011001 -> 10..1...1...01
+ * read-only, dirty, young .01...011101 -> 11..1...0...01
+ * read-write, clean, old .11...110001 -> 00..0...1...11
+ * read-write, clean, young .01...110101 -> 01..0...0...11
+ * read-write, dirty, old .10...111001 -> 10..0...1...11
+ * read-write, dirty, young .00...111101 -> 11..0...0...11
*/
if (pte_present(pte)) {
pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
- if (pte_val(pte) & _PAGE_INVALID)
- pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
- none = (pte_val(pte) & _PAGE_PRESENT) &&
- !(pte_val(pte) & _PAGE_READ) &&
- !(pte_val(pte) & _PAGE_WRITE);
- prot = (pte_val(pte) & _PAGE_PROTECT) &&
- !(pte_val(pte) & _PAGE_WRITE);
- young = pte_val(pte) & _PAGE_YOUNG;
- if (none || young)
- pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
- if (prot || (none && young))
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_INVALID) >> 5;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT);
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
} else
pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
return pmd;
@@ -56,34 +47,31 @@
pte_t pte;
/*
- * Convert encoding pmd bits pte bits
- * ..R...I...y. .IR...wrdytp
- * empty ..0...1...0. -> .10...000000
- * prot-none, old ..0...1...1. -> .10...001001
- * prot-none, young ..1...1...1. -> .10...001101
- * read-only, old ..1...1...0. -> .11...011001
- * read-only, young ..1...0...1. -> .01...011101
- * read-write, old ..0...1...0. -> .10...111001
- * read-write, young ..0...0...1. -> .00...111101
- * Huge ptes are dirty by definition
+ * Convert encoding pmd bits pte bits
+ * dy..R...I...wr .IR...wrdytp
+ * empty 00..0...1...00 -> .10...001100
+ * prot-none, clean, old 00..0...1...00 -> .10...000001
+ * prot-none, clean, young 01..0...1...00 -> .10...000101
+ * prot-none, dirty, old 10..0...1...00 -> .10...001001
+ * prot-none, dirty, young 11..0...1...00 -> .10...001101
+ * read-only, clean, old 00..1...1...01 -> .11...010001
+ * read-only, clean, young 01..1...1...01 -> .11...010101
+ * read-only, dirty, old 10..1...1...01 -> .11...011001
+ * read-only, dirty, young 11..1...1...01 -> .11...011101
+ * read-write, clean, old 00..0...1...11 -> .10...110001
+ * read-write, clean, young 01..0...1...11 -> .10...110101
+ * read-write, dirty, old 10..0...1...11 -> .10...111001
+ * read-write, dirty, young 11..0...1...11 -> .10...111101
*/
if (pmd_present(pmd)) {
- pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY |
- (pmd_val(pmd) & PAGE_MASK);
- if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)
- pte_val(pte) |= _PAGE_INVALID;
- if (pmd_prot_none(pmd)) {
- if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
- pte_val(pte) |= _PAGE_YOUNG;
- } else {
- pte_val(pte) |= _PAGE_READ;
- if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
- pte_val(pte) |= _PAGE_PROTECT;
- else
- pte_val(pte) |= _PAGE_WRITE;
- if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)
- pte_val(pte) |= _PAGE_YOUNG;
- }
+ pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE;
+ pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
} else
pte_val(pte) = _PAGE_INVALID;
return pte;
@@ -96,6 +84,7 @@
pmd = __pte_to_pmd(pte);
if (!MACHINE_HAS_HPAGE) {
+ /* Emulated huge ptes loose the dirty and young bit */
pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
pmd_val(pmd) |= pte_page(pte)[1].index;
} else
@@ -113,6 +102,8 @@
origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
pmd_val(pmd) |= *(unsigned long *) origin;
+ /* Emulated huge ptes are young and dirty by definition */
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY;
}
return __pmd_to_pte(pmd);
}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index f90ad85..19daa53 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1433,6 +1433,9 @@
{
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ entry = pmd_mkyoung(entry);
+ if (dirty)
+ entry = pmd_mkdirty(entry);
if (pmd_same(*pmdp, entry))
return 0;
pmdp_invalidate(vma, address, pmdp);