blob: 0648a2225b09e69e207a86a34a9a044fe6663a24 [file] [log] [blame]
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -07001/*
2 * Handle caching attributes in page tables (PAT)
3 *
4 * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
5 * Suresh B Siddha <suresh.b.siddha@intel.com>
6 *
7 * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
8 */
9
10#include <linux/mm.h>
11#include <linux/kernel.h>
12#include <linux/gfp.h>
13#include <linux/fs.h>
14
15#include <asm/msr.h>
16#include <asm/tlbflush.h>
17#include <asm/processor.h>
18#include <asm/pgtable.h>
19#include <asm/pat.h>
20#include <asm/e820.h>
21#include <asm/cacheflush.h>
22#include <asm/fcntl.h>
23#include <asm/mtrr.h>
24
25int pat_wc_enabled = 1;
26
27static u64 __read_mostly boot_pat_state;
28
29static int nopat(char *str)
30{
31 pat_wc_enabled = 0;
32 printk(KERN_INFO "x86: PAT support disabled.\n");
33
34 return 0;
35}
36early_param("nopat", nopat);
37
38static int pat_known_cpu(void)
39{
40 if (!pat_wc_enabled)
41 return 0;
42
43 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
44 (boot_cpu_data.x86 == 0xF ||
45 (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model >= 15))) {
46 if (cpu_has_pat) {
47 return 1;
48 }
49 }
Yinghai Lu35605a12008-03-24 16:02:01 -070050 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
51 boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x11) {
52 if (cpu_has_pat) {
53 return 1;
54 }
55 }
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -070056
57 pat_wc_enabled = 0;
58 printk(KERN_INFO "CPU and/or kernel does not support PAT.\n");
59 return 0;
60}
61
62enum {
63 PAT_UC = 0, /* uncached */
64 PAT_WC = 1, /* Write combining */
65 PAT_WT = 4, /* Write Through */
66 PAT_WP = 5, /* Write Protected */
67 PAT_WB = 6, /* Write Back (default) */
68 PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
69};
70
71#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8))
72
73void pat_init(void)
74{
75 u64 pat;
76
77#ifndef CONFIG_X86_PAT
78 nopat(NULL);
79#endif
80
81 /* Boot CPU enables PAT based on CPU feature */
82 if (!smp_processor_id() && !pat_known_cpu())
83 return;
84
85 /* APs enable PAT iff boot CPU has enabled it before */
86 if (smp_processor_id() && !pat_wc_enabled)
87 return;
88
89 /* Set PWT to Write-Combining. All other bits stay the same */
90 /*
91 * PTE encoding used in Linux:
92 * PAT
93 * |PCD
94 * ||PWT
95 * |||
96 * 000 WB _PAGE_CACHE_WB
97 * 001 WC _PAGE_CACHE_WC
98 * 010 UC- _PAGE_CACHE_UC_MINUS
99 * 011 UC _PAGE_CACHE_UC
100 * PAT bit unused
101 */
102 pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) |
103 PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC);
104
105 /* Boot CPU check */
106 if (!smp_processor_id()) {
107 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
108 }
109
110 wrmsrl(MSR_IA32_CR_PAT, pat);
111 printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
112 smp_processor_id(), boot_pat_state, pat);
113}
114
115#undef PAT
116
117static char *cattr_name(unsigned long flags)
118{
119 switch (flags & _PAGE_CACHE_MASK) {
120 case _PAGE_CACHE_UC: return "uncached";
121 case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
122 case _PAGE_CACHE_WB: return "write-back";
123 case _PAGE_CACHE_WC: return "write-combining";
124 default: return "broken";
125 }
126}
127
128/*
129 * The global memtype list keeps track of memory type for specific
130 * physical memory areas. Conflicting memory types in different
131 * mappings can cause CPU cache corruption. To avoid this we keep track.
132 *
133 * The list is sorted based on starting address and can contain multiple
134 * entries for each address (this allows reference counting for overlapping
135 * areas). All the aliases have the same cache attributes of course.
136 * Zero attributes are represented as holes.
137 *
138 * Currently the data structure is a list because the number of mappings
139 * are expected to be relatively small. If this should be a problem
140 * it could be changed to a rbtree or similar.
141 *
142 * memtype_lock protects the whole list.
143 */
144
145struct memtype {
146 u64 start;
147 u64 end;
148 unsigned long type;
149 struct list_head nd;
150};
151
152static LIST_HEAD(memtype_list);
153static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
154
155/*
156 * Does intersection of PAT memory type and MTRR memory type and returns
157 * the resulting memory type as PAT understands it.
158 * (Type in pat and mtrr will not have same value)
159 * The intersection is based on "Effective Memory Type" tables in IA-32
160 * SDM vol 3a
161 */
162static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
163 unsigned long *ret_prot)
164{
165 unsigned long pat_type;
166 u8 mtrr_type;
167
168 mtrr_type = mtrr_type_lookup(start, end);
169 if (mtrr_type == 0xFF) { /* MTRR not enabled */
170 *ret_prot = prot;
171 return 0;
172 }
173 if (mtrr_type == 0xFE) { /* MTRR match error */
174 *ret_prot = _PAGE_CACHE_UC;
175 return -1;
176 }
177 if (mtrr_type != MTRR_TYPE_UNCACHABLE &&
178 mtrr_type != MTRR_TYPE_WRBACK &&
179 mtrr_type != MTRR_TYPE_WRCOMB) { /* MTRR type unhandled */
180 *ret_prot = _PAGE_CACHE_UC;
181 return -1;
182 }
183
184 pat_type = prot & _PAGE_CACHE_MASK;
185 prot &= (~_PAGE_CACHE_MASK);
186
187 /* Currently doing intersection by hand. Optimize it later. */
188 if (pat_type == _PAGE_CACHE_WC) {
189 *ret_prot = prot | _PAGE_CACHE_WC;
190 } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
191 *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
192 } else if (pat_type == _PAGE_CACHE_UC ||
193 mtrr_type == MTRR_TYPE_UNCACHABLE) {
194 *ret_prot = prot | _PAGE_CACHE_UC;
195 } else if (mtrr_type == MTRR_TYPE_WRCOMB) {
196 *ret_prot = prot | _PAGE_CACHE_WC;
197 } else {
198 *ret_prot = prot | _PAGE_CACHE_WB;
199 }
200
201 return 0;
202}
203
204int reserve_memtype(u64 start, u64 end, unsigned long req_type,
205 unsigned long *ret_type)
206{
207 struct memtype *new_entry = NULL;
208 struct memtype *parse;
209 unsigned long actual_type;
210 int err = 0;
211
212 /* Only track when pat_wc_enabled */
213 if (!pat_wc_enabled) {
214 if (ret_type)
215 *ret_type = req_type;
216
217 return 0;
218 }
219
220 /* Low ISA region is always mapped WB in page table. No need to track */
221 if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) {
222 if (ret_type)
223 *ret_type = _PAGE_CACHE_WB;
224
225 return 0;
226 }
227
228 req_type &= _PAGE_CACHE_MASK;
229 err = pat_x_mtrr_type(start, end, req_type, &actual_type);
230 if (err) {
231 if (ret_type)
232 *ret_type = actual_type;
233
234 return -EINVAL;
235 }
236
237 new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL);
238 if (!new_entry)
239 return -ENOMEM;
240
241 new_entry->start = start;
242 new_entry->end = end;
243 new_entry->type = actual_type;
244
245 if (ret_type)
246 *ret_type = actual_type;
247
248 spin_lock(&memtype_lock);
249
250 /* Search for existing mapping that overlaps the current range */
251 list_for_each_entry(parse, &memtype_list, nd) {
252 struct memtype *saved_ptr;
253
254 if (parse->start >= end) {
venkatesh.pallipadi@intel.com6997ab42008-03-18 17:00:25 -0700255 printk("New Entry\n");
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700256 list_add(&new_entry->nd, parse->nd.prev);
257 new_entry = NULL;
258 break;
259 }
260
261 if (start <= parse->start && end >= parse->start) {
262 if (actual_type != parse->type && ret_type) {
263 actual_type = parse->type;
264 *ret_type = actual_type;
265 new_entry->type = actual_type;
266 }
267
268 if (actual_type != parse->type) {
269 printk(
270 KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
271 current->comm, current->pid,
272 start, end,
273 cattr_name(actual_type),
274 cattr_name(parse->type));
275 err = -EBUSY;
276 break;
277 }
278
279 saved_ptr = parse;
280 /*
281 * Check to see whether the request overlaps more
282 * than one entry in the list
283 */
284 list_for_each_entry_continue(parse, &memtype_list, nd) {
285 if (end <= parse->start) {
286 break;
287 }
288
289 if (actual_type != parse->type) {
290 printk(
291 KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
292 current->comm, current->pid,
293 start, end,
294 cattr_name(actual_type),
295 cattr_name(parse->type));
296 err = -EBUSY;
297 break;
298 }
299 }
300
301 if (err) {
302 break;
303 }
304
venkatesh.pallipadi@intel.com6997ab42008-03-18 17:00:25 -0700305 printk("Overlap at 0x%Lx-0x%Lx\n",
306 saved_ptr->start, saved_ptr->end);
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700307 /* No conflict. Go ahead and add this new entry */
308 list_add(&new_entry->nd, saved_ptr->nd.prev);
309 new_entry = NULL;
310 break;
311 }
312
313 if (start < parse->end) {
314 if (actual_type != parse->type && ret_type) {
315 actual_type = parse->type;
316 *ret_type = actual_type;
317 new_entry->type = actual_type;
318 }
319
320 if (actual_type != parse->type) {
321 printk(
322 KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
323 current->comm, current->pid,
324 start, end,
325 cattr_name(actual_type),
326 cattr_name(parse->type));
327 err = -EBUSY;
328 break;
329 }
330
331 saved_ptr = parse;
332 /*
333 * Check to see whether the request overlaps more
334 * than one entry in the list
335 */
336 list_for_each_entry_continue(parse, &memtype_list, nd) {
337 if (end <= parse->start) {
338 break;
339 }
340
341 if (actual_type != parse->type) {
342 printk(
343 KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
344 current->comm, current->pid,
345 start, end,
346 cattr_name(actual_type),
347 cattr_name(parse->type));
348 err = -EBUSY;
349 break;
350 }
351 }
352
353 if (err) {
354 break;
355 }
356
venkatesh.pallipadi@intel.com6997ab42008-03-18 17:00:25 -0700357 printk("Overlap at 0x%Lx-0x%Lx\n",
358 saved_ptr->start, saved_ptr->end);
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700359 /* No conflict. Go ahead and add this new entry */
360 list_add(&new_entry->nd, &saved_ptr->nd);
361 new_entry = NULL;
362 break;
363 }
364 }
365
366 if (err) {
venkatesh.pallipadi@intel.com6997ab42008-03-18 17:00:25 -0700367 printk(
368 "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
369 start, end, cattr_name(new_entry->type),
370 cattr_name(req_type));
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700371 kfree(new_entry);
372 spin_unlock(&memtype_lock);
373 return err;
374 }
375
376 if (new_entry) {
377 /* No conflict. Not yet added to the list. Add to the tail */
378 list_add_tail(&new_entry->nd, &memtype_list);
venkatesh.pallipadi@intel.com6997ab42008-03-18 17:00:25 -0700379 printk("New Entry\n");
380 }
381
382 if (ret_type) {
383 printk(
384 "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
385 start, end, cattr_name(actual_type),
386 cattr_name(req_type), cattr_name(*ret_type));
387 } else {
388 printk(
389 "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
390 start, end, cattr_name(actual_type),
391 cattr_name(req_type));
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700392 }
393
394 spin_unlock(&memtype_lock);
395 return err;
396}
397
398int free_memtype(u64 start, u64 end)
399{
400 struct memtype *ml;
401 int err = -EINVAL;
402
403 /* Only track when pat_wc_enabled */
404 if (!pat_wc_enabled) {
405 return 0;
406 }
407
408 /* Low ISA region is always mapped WB. No need to track */
409 if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) {
410 return 0;
411 }
412
413 spin_lock(&memtype_lock);
414 list_for_each_entry(ml, &memtype_list, nd) {
415 if (ml->start == start && ml->end == end) {
416 list_del(&ml->nd);
417 kfree(ml);
418 err = 0;
419 break;
420 }
421 }
422 spin_unlock(&memtype_lock);
423
424 if (err) {
425 printk(KERN_DEBUG "%s:%d freeing invalid memtype %Lx-%Lx\n",
426 current->comm, current->pid, start, end);
427 }
venkatesh.pallipadi@intel.com6997ab42008-03-18 17:00:25 -0700428
429 printk( "free_memtype request 0x%Lx-0x%Lx\n", start, end);
venkatesh.pallipadi@intel.com2e5d9c82008-03-18 17:00:14 -0700430 return err;
431}
432