blob: 3d98ba82ea67a321620c94e387276700c65b7081 [file] [log] [blame]
Martin Schwidefsky3610cce2007-10-22 12:52:47 +02001/*
2 * arch/s390/mm/pgtable.c
3 *
4 * Copyright IBM Corp. 2007
5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 */
7
8#include <linux/sched.h>
9#include <linux/kernel.h>
10#include <linux/errno.h>
11#include <linux/mm.h>
12#include <linux/swap.h>
13#include <linux/smp.h>
14#include <linux/highmem.h>
15#include <linux/slab.h>
16#include <linux/pagemap.h>
17#include <linux/spinlock.h>
18#include <linux/module.h>
19#include <linux/quicklist.h>
20
21#include <asm/system.h>
22#include <asm/pgtable.h>
23#include <asm/pgalloc.h>
24#include <asm/tlb.h>
25#include <asm/tlbflush.h>
Martin Schwidefsky6252d702008-02-09 18:24:37 +010026#include <asm/mmu_context.h>
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020027
28#ifndef CONFIG_64BIT
29#define ALLOC_ORDER 1
Martin Schwidefsky146e4b32008-02-09 18:24:35 +010030#define TABLES_PER_PAGE 4
31#define FRAG_MASK 15UL
32#define SECOND_HALVES 10UL
Carsten Otte402b0862008-03-25 18:47:10 +010033
34void clear_table_pgstes(unsigned long *table)
35{
36 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
37 memset(table + 256, 0, PAGE_SIZE/4);
38 clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
39 memset(table + 768, 0, PAGE_SIZE/4);
40}
41
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020042#else
43#define ALLOC_ORDER 2
Martin Schwidefsky146e4b32008-02-09 18:24:35 +010044#define TABLES_PER_PAGE 2
45#define FRAG_MASK 3UL
46#define SECOND_HALVES 2UL
Carsten Otte402b0862008-03-25 18:47:10 +010047
48void clear_table_pgstes(unsigned long *table)
49{
50 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
51 memset(table + 256, 0, PAGE_SIZE/2);
52}
53
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020054#endif
55
56unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
57{
58 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
59
60 if (!page)
61 return NULL;
62 page->index = 0;
63 if (noexec) {
64 struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
65 if (!shadow) {
66 __free_pages(page, ALLOC_ORDER);
67 return NULL;
68 }
69 page->index = page_to_phys(shadow);
70 }
Martin Schwidefsky146e4b32008-02-09 18:24:35 +010071 spin_lock(&mm->page_table_lock);
72 list_add(&page->lru, &mm->context.crst_list);
73 spin_unlock(&mm->page_table_lock);
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020074 return (unsigned long *) page_to_phys(page);
75}
76
Martin Schwidefsky146e4b32008-02-09 18:24:35 +010077void crst_table_free(struct mm_struct *mm, unsigned long *table)
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020078{
79 unsigned long *shadow = get_shadow_table(table);
Martin Schwidefsky146e4b32008-02-09 18:24:35 +010080 struct page *page = virt_to_page(table);
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020081
Martin Schwidefsky146e4b32008-02-09 18:24:35 +010082 spin_lock(&mm->page_table_lock);
83 list_del(&page->lru);
84 spin_unlock(&mm->page_table_lock);
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020085 if (shadow)
86 free_pages((unsigned long) shadow, ALLOC_ORDER);
87 free_pages((unsigned long) table, ALLOC_ORDER);
88}
89
Martin Schwidefsky6252d702008-02-09 18:24:37 +010090#ifdef CONFIG_64BIT
91int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
92{
93 unsigned long *table, *pgd;
94 unsigned long entry;
95
96 BUG_ON(limit > (1UL << 53));
97repeat:
98 table = crst_table_alloc(mm, mm->context.noexec);
99 if (!table)
100 return -ENOMEM;
101 spin_lock(&mm->page_table_lock);
102 if (mm->context.asce_limit < limit) {
103 pgd = (unsigned long *) mm->pgd;
104 if (mm->context.asce_limit <= (1UL << 31)) {
105 entry = _REGION3_ENTRY_EMPTY;
106 mm->context.asce_limit = 1UL << 42;
107 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
108 _ASCE_USER_BITS |
109 _ASCE_TYPE_REGION3;
110 } else {
111 entry = _REGION2_ENTRY_EMPTY;
112 mm->context.asce_limit = 1UL << 53;
113 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
114 _ASCE_USER_BITS |
115 _ASCE_TYPE_REGION2;
116 }
117 crst_table_init(table, entry);
118 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
119 mm->pgd = (pgd_t *) table;
120 table = NULL;
121 }
122 spin_unlock(&mm->page_table_lock);
123 if (table)
124 crst_table_free(mm, table);
125 if (mm->context.asce_limit < limit)
126 goto repeat;
127 update_mm(mm, current);
128 return 0;
129}
130
131void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
132{
133 pgd_t *pgd;
134
135 if (mm->context.asce_limit <= limit)
136 return;
137 __tlb_flush_mm(mm);
138 while (mm->context.asce_limit > limit) {
139 pgd = mm->pgd;
140 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
141 case _REGION_ENTRY_TYPE_R2:
142 mm->context.asce_limit = 1UL << 42;
143 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
144 _ASCE_USER_BITS |
145 _ASCE_TYPE_REGION3;
146 break;
147 case _REGION_ENTRY_TYPE_R3:
148 mm->context.asce_limit = 1UL << 31;
149 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
150 _ASCE_USER_BITS |
151 _ASCE_TYPE_SEGMENT;
152 break;
153 default:
154 BUG();
155 }
156 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
157 crst_table_free(mm, (unsigned long *) pgd);
158 }
159 update_mm(mm, current);
160}
161#endif
162
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200163/*
164 * page table entry allocation/free routines.
165 */
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100166unsigned long *page_table_alloc(struct mm_struct *mm)
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200167{
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100168 struct page *page;
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200169 unsigned long *table;
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100170 unsigned long bits;
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200171
Carsten Otte402b0862008-03-25 18:47:10 +0100172 bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100173 spin_lock(&mm->page_table_lock);
174 page = NULL;
175 if (!list_empty(&mm->context.pgtable_list)) {
176 page = list_first_entry(&mm->context.pgtable_list,
177 struct page, lru);
178 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
179 page = NULL;
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200180 }
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100181 if (!page) {
182 spin_unlock(&mm->page_table_lock);
183 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
184 if (!page)
185 return NULL;
186 pgtable_page_ctor(page);
187 page->flags &= ~FRAG_MASK;
188 table = (unsigned long *) page_to_phys(page);
Carsten Otte402b0862008-03-25 18:47:10 +0100189 if (mm->context.pgstes)
190 clear_table_pgstes(table);
191 else
192 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100193 spin_lock(&mm->page_table_lock);
194 list_add(&page->lru, &mm->context.pgtable_list);
195 }
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200196 table = (unsigned long *) page_to_phys(page);
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100197 while (page->flags & bits) {
198 table += 256;
199 bits <<= 1;
200 }
201 page->flags |= bits;
202 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
203 list_move_tail(&page->lru, &mm->context.pgtable_list);
204 spin_unlock(&mm->page_table_lock);
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200205 return table;
206}
207
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100208void page_table_free(struct mm_struct *mm, unsigned long *table)
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200209{
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100210 struct page *page;
211 unsigned long bits;
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200212
Carsten Otte402b0862008-03-25 18:47:10 +0100213 bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100214 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
215 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
216 spin_lock(&mm->page_table_lock);
217 page->flags ^= bits;
218 if (page->flags & FRAG_MASK) {
219 /* Page now has some free pgtable fragments. */
220 list_move(&page->lru, &mm->context.pgtable_list);
221 page = NULL;
222 } else
223 /* All fragments of the 4K page have been freed. */
224 list_del(&page->lru);
225 spin_unlock(&mm->page_table_lock);
226 if (page) {
227 pgtable_page_dtor(page);
228 __free_page(page);
229 }
230}
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200231
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100232void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
233{
234 struct page *page;
235
236 spin_lock(&mm->page_table_lock);
237 /* Free shadow region and segment tables. */
238 list_for_each_entry(page, &mm->context.crst_list, lru)
239 if (page->index) {
240 free_pages((unsigned long) page->index, ALLOC_ORDER);
241 page->index = 0;
242 }
243 /* "Free" second halves of page tables. */
244 list_for_each_entry(page, &mm->context.pgtable_list, lru)
245 page->flags &= ~SECOND_HALVES;
246 spin_unlock(&mm->page_table_lock);
247 mm->context.noexec = 0;
248 update_mm(mm, tsk);
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200249}
Carsten Otte402b0862008-03-25 18:47:10 +0100250
251/*
252 * switch on pgstes for its userspace process (for kvm)
253 */
254int s390_enable_sie(void)
255{
256 struct task_struct *tsk = current;
Christian Borntraeger74b6b522008-05-21 13:37:29 +0200257 struct mm_struct *mm, *old_mm;
Carsten Otte402b0862008-03-25 18:47:10 +0100258
Christian Borntraeger74b6b522008-05-21 13:37:29 +0200259 /* Do we have pgstes? if yes, we are done */
Carsten Otte402b0862008-03-25 18:47:10 +0100260 if (tsk->mm->context.pgstes)
Christian Borntraeger74b6b522008-05-21 13:37:29 +0200261 return 0;
Carsten Otte402b0862008-03-25 18:47:10 +0100262
Christian Borntraeger74b6b522008-05-21 13:37:29 +0200263 /* lets check if we are allowed to replace the mm */
264 task_lock(tsk);
Carsten Otte402b0862008-03-25 18:47:10 +0100265 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
Christian Borntraeger74b6b522008-05-21 13:37:29 +0200266 tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
267 task_unlock(tsk);
268 return -EINVAL;
269 }
270 task_unlock(tsk);
Carsten Otte402b0862008-03-25 18:47:10 +0100271
Christian Borntraeger74b6b522008-05-21 13:37:29 +0200272 /* we copy the mm with pgstes enabled */
273 tsk->mm->context.pgstes = 1;
Carsten Otte402b0862008-03-25 18:47:10 +0100274 mm = dup_mm(tsk);
275 tsk->mm->context.pgstes = 0;
Carsten Otte402b0862008-03-25 18:47:10 +0100276 if (!mm)
Christian Borntraeger74b6b522008-05-21 13:37:29 +0200277 return -ENOMEM;
278
279 /* Now lets check again if somebody attached ptrace etc */
280 task_lock(tsk);
281 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
282 tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
283 mmput(mm);
284 task_unlock(tsk);
285 return -EINVAL;
286 }
287
288 /* ok, we are alone. No ptrace, no threads, etc. */
289 old_mm = tsk->mm;
Carsten Otte402b0862008-03-25 18:47:10 +0100290 tsk->mm = tsk->active_mm = mm;
291 preempt_disable();
292 update_mm(mm, tsk);
293 cpu_set(smp_processor_id(), mm->cpu_vm_mask);
294 preempt_enable();
Carsten Otte402b0862008-03-25 18:47:10 +0100295 task_unlock(tsk);
Christian Borntraeger74b6b522008-05-21 13:37:29 +0200296 mmput(old_mm);
297 return 0;
Carsten Otte402b0862008-03-25 18:47:10 +0100298}
299EXPORT_SYMBOL_GPL(s390_enable_sie);