blob: cb24defdb6d9a2b9c590d97ba6f26e65f6131fc4 [file] [log] [blame]
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
20 */
21
22#include <linux/init.h>
23#include <linux/bitmap.h>
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
27#include <linux/sysdev.h>
28#include <linux/spinlock.h>
29#include <linux/pci.h>
30#include <linux/dmar.h>
31#include <linux/dma-mapping.h>
32#include <linux/mempool.h>
33#include "iova.h"
34#include "intel-iommu.h"
35#include <asm/proto.h> /* force_iommu in this header in x86-64*/
36#include <asm/cacheflush.h>
37#include <asm/iommu.h>
38#include "pci.h"
39
40#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
41#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
42
43#define IOAPIC_RANGE_START (0xfee00000)
44#define IOAPIC_RANGE_END (0xfeefffff)
45#define IOVA_START_ADDR (0x1000)
46
47#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
48
49#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
50
51#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
52
53static void domain_remove_dev_info(struct dmar_domain *domain);
54
55static int dmar_disabled;
56static int __initdata dmar_map_gfx = 1;
Keshavamurthy, Anil S7d3b03c2007-10-21 16:41:53 -070057static int dmar_forcedac;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070058
59#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
60static DEFINE_SPINLOCK(device_domain_lock);
61static LIST_HEAD(device_domain_list);
62
63static int __init intel_iommu_setup(char *str)
64{
65 if (!str)
66 return -EINVAL;
67 while (*str) {
68 if (!strncmp(str, "off", 3)) {
69 dmar_disabled = 1;
70 printk(KERN_INFO"Intel-IOMMU: disabled\n");
71 } else if (!strncmp(str, "igfx_off", 8)) {
72 dmar_map_gfx = 0;
73 printk(KERN_INFO
74 "Intel-IOMMU: disable GFX device mapping\n");
Keshavamurthy, Anil S7d3b03c2007-10-21 16:41:53 -070075 } else if (!strncmp(str, "forcedac", 8)) {
76 printk (KERN_INFO
77 "Intel-IOMMU: Forcing DAC for PCI devices\n");
78 dmar_forcedac = 1;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070079 }
80
81 str += strcspn(str, ",");
82 while (*str == ',')
83 str++;
84 }
85 return 0;
86}
87__setup("intel_iommu=", intel_iommu_setup);
88
89static struct kmem_cache *iommu_domain_cache;
90static struct kmem_cache *iommu_devinfo_cache;
91static struct kmem_cache *iommu_iova_cache;
92
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -070093static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
94{
95 unsigned int flags;
96 void *vaddr;
97
98 /* trying to avoid low memory issues */
99 flags = current->flags & PF_MEMALLOC;
100 current->flags |= PF_MEMALLOC;
101 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
102 current->flags &= (~PF_MEMALLOC | flags);
103 return vaddr;
104}
105
106
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700107static inline void *alloc_pgtable_page(void)
108{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700109 unsigned int flags;
110 void *vaddr;
111
112 /* trying to avoid low memory issues */
113 flags = current->flags & PF_MEMALLOC;
114 current->flags |= PF_MEMALLOC;
115 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
116 current->flags &= (~PF_MEMALLOC | flags);
117 return vaddr;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700118}
119
120static inline void free_pgtable_page(void *vaddr)
121{
122 free_page((unsigned long)vaddr);
123}
124
125static inline void *alloc_domain_mem(void)
126{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700127 return iommu_kmem_cache_alloc(iommu_domain_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700128}
129
130static inline void free_domain_mem(void *vaddr)
131{
132 kmem_cache_free(iommu_domain_cache, vaddr);
133}
134
135static inline void * alloc_devinfo_mem(void)
136{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700137 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700138}
139
140static inline void free_devinfo_mem(void *vaddr)
141{
142 kmem_cache_free(iommu_devinfo_cache, vaddr);
143}
144
145struct iova *alloc_iova_mem(void)
146{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700147 return iommu_kmem_cache_alloc(iommu_iova_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700148}
149
150void free_iova_mem(struct iova *iova)
151{
152 kmem_cache_free(iommu_iova_cache, iova);
153}
154
155static inline void __iommu_flush_cache(
156 struct intel_iommu *iommu, void *addr, int size)
157{
158 if (!ecap_coherent(iommu->ecap))
159 clflush_cache_range(addr, size);
160}
161
162/* Gets context entry for a given bus and devfn */
163static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
164 u8 bus, u8 devfn)
165{
166 struct root_entry *root;
167 struct context_entry *context;
168 unsigned long phy_addr;
169 unsigned long flags;
170
171 spin_lock_irqsave(&iommu->lock, flags);
172 root = &iommu->root_entry[bus];
173 context = get_context_addr_from_root(root);
174 if (!context) {
175 context = (struct context_entry *)alloc_pgtable_page();
176 if (!context) {
177 spin_unlock_irqrestore(&iommu->lock, flags);
178 return NULL;
179 }
180 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
181 phy_addr = virt_to_phys((void *)context);
182 set_root_value(root, phy_addr);
183 set_root_present(root);
184 __iommu_flush_cache(iommu, root, sizeof(*root));
185 }
186 spin_unlock_irqrestore(&iommu->lock, flags);
187 return &context[devfn];
188}
189
190static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
191{
192 struct root_entry *root;
193 struct context_entry *context;
194 int ret;
195 unsigned long flags;
196
197 spin_lock_irqsave(&iommu->lock, flags);
198 root = &iommu->root_entry[bus];
199 context = get_context_addr_from_root(root);
200 if (!context) {
201 ret = 0;
202 goto out;
203 }
204 ret = context_present(context[devfn]);
205out:
206 spin_unlock_irqrestore(&iommu->lock, flags);
207 return ret;
208}
209
210static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
211{
212 struct root_entry *root;
213 struct context_entry *context;
214 unsigned long flags;
215
216 spin_lock_irqsave(&iommu->lock, flags);
217 root = &iommu->root_entry[bus];
218 context = get_context_addr_from_root(root);
219 if (context) {
220 context_clear_entry(context[devfn]);
221 __iommu_flush_cache(iommu, &context[devfn], \
222 sizeof(*context));
223 }
224 spin_unlock_irqrestore(&iommu->lock, flags);
225}
226
227static void free_context_table(struct intel_iommu *iommu)
228{
229 struct root_entry *root;
230 int i;
231 unsigned long flags;
232 struct context_entry *context;
233
234 spin_lock_irqsave(&iommu->lock, flags);
235 if (!iommu->root_entry) {
236 goto out;
237 }
238 for (i = 0; i < ROOT_ENTRY_NR; i++) {
239 root = &iommu->root_entry[i];
240 context = get_context_addr_from_root(root);
241 if (context)
242 free_pgtable_page(context);
243 }
244 free_pgtable_page(iommu->root_entry);
245 iommu->root_entry = NULL;
246out:
247 spin_unlock_irqrestore(&iommu->lock, flags);
248}
249
250/* page table handling */
251#define LEVEL_STRIDE (9)
252#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
253
254static inline int agaw_to_level(int agaw)
255{
256 return agaw + 2;
257}
258
259static inline int agaw_to_width(int agaw)
260{
261 return 30 + agaw * LEVEL_STRIDE;
262
263}
264
265static inline int width_to_agaw(int width)
266{
267 return (width - 30) / LEVEL_STRIDE;
268}
269
270static inline unsigned int level_to_offset_bits(int level)
271{
272 return (12 + (level - 1) * LEVEL_STRIDE);
273}
274
275static inline int address_level_offset(u64 addr, int level)
276{
277 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
278}
279
280static inline u64 level_mask(int level)
281{
282 return ((u64)-1 << level_to_offset_bits(level));
283}
284
285static inline u64 level_size(int level)
286{
287 return ((u64)1 << level_to_offset_bits(level));
288}
289
290static inline u64 align_to_level(u64 addr, int level)
291{
292 return ((addr + level_size(level) - 1) & level_mask(level));
293}
294
295static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
296{
297 int addr_width = agaw_to_width(domain->agaw);
298 struct dma_pte *parent, *pte = NULL;
299 int level = agaw_to_level(domain->agaw);
300 int offset;
301 unsigned long flags;
302
303 BUG_ON(!domain->pgd);
304
305 addr &= (((u64)1) << addr_width) - 1;
306 parent = domain->pgd;
307
308 spin_lock_irqsave(&domain->mapping_lock, flags);
309 while (level > 0) {
310 void *tmp_page;
311
312 offset = address_level_offset(addr, level);
313 pte = &parent[offset];
314 if (level == 1)
315 break;
316
317 if (!dma_pte_present(*pte)) {
318 tmp_page = alloc_pgtable_page();
319
320 if (!tmp_page) {
321 spin_unlock_irqrestore(&domain->mapping_lock,
322 flags);
323 return NULL;
324 }
325 __iommu_flush_cache(domain->iommu, tmp_page,
326 PAGE_SIZE_4K);
327 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
328 /*
329 * high level table always sets r/w, last level page
330 * table control read/write
331 */
332 dma_set_pte_readable(*pte);
333 dma_set_pte_writable(*pte);
334 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
335 }
336 parent = phys_to_virt(dma_pte_addr(*pte));
337 level--;
338 }
339
340 spin_unlock_irqrestore(&domain->mapping_lock, flags);
341 return pte;
342}
343
344/* return address's pte at specific level */
345static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
346 int level)
347{
348 struct dma_pte *parent, *pte = NULL;
349 int total = agaw_to_level(domain->agaw);
350 int offset;
351
352 parent = domain->pgd;
353 while (level <= total) {
354 offset = address_level_offset(addr, total);
355 pte = &parent[offset];
356 if (level == total)
357 return pte;
358
359 if (!dma_pte_present(*pte))
360 break;
361 parent = phys_to_virt(dma_pte_addr(*pte));
362 total--;
363 }
364 return NULL;
365}
366
367/* clear one page's page table */
368static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
369{
370 struct dma_pte *pte = NULL;
371
372 /* get last level pte */
373 pte = dma_addr_level_pte(domain, addr, 1);
374
375 if (pte) {
376 dma_clear_pte(*pte);
377 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
378 }
379}
380
381/* clear last level pte, a tlb flush should be followed */
382static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
383{
384 int addr_width = agaw_to_width(domain->agaw);
385
386 start &= (((u64)1) << addr_width) - 1;
387 end &= (((u64)1) << addr_width) - 1;
388 /* in case it's partial page */
389 start = PAGE_ALIGN_4K(start);
390 end &= PAGE_MASK_4K;
391
392 /* we don't need lock here, nobody else touches the iova range */
393 while (start < end) {
394 dma_pte_clear_one(domain, start);
395 start += PAGE_SIZE_4K;
396 }
397}
398
399/* free page table pages. last level pte should already be cleared */
400static void dma_pte_free_pagetable(struct dmar_domain *domain,
401 u64 start, u64 end)
402{
403 int addr_width = agaw_to_width(domain->agaw);
404 struct dma_pte *pte;
405 int total = agaw_to_level(domain->agaw);
406 int level;
407 u64 tmp;
408
409 start &= (((u64)1) << addr_width) - 1;
410 end &= (((u64)1) << addr_width) - 1;
411
412 /* we don't need lock here, nobody else touches the iova range */
413 level = 2;
414 while (level <= total) {
415 tmp = align_to_level(start, level);
416 if (tmp >= end || (tmp + level_size(level) > end))
417 return;
418
419 while (tmp < end) {
420 pte = dma_addr_level_pte(domain, tmp, level);
421 if (pte) {
422 free_pgtable_page(
423 phys_to_virt(dma_pte_addr(*pte)));
424 dma_clear_pte(*pte);
425 __iommu_flush_cache(domain->iommu,
426 pte, sizeof(*pte));
427 }
428 tmp += level_size(level);
429 }
430 level++;
431 }
432 /* free pgd */
433 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
434 free_pgtable_page(domain->pgd);
435 domain->pgd = NULL;
436 }
437}
438
439/* iommu handling */
440static int iommu_alloc_root_entry(struct intel_iommu *iommu)
441{
442 struct root_entry *root;
443 unsigned long flags;
444
445 root = (struct root_entry *)alloc_pgtable_page();
446 if (!root)
447 return -ENOMEM;
448
449 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
450
451 spin_lock_irqsave(&iommu->lock, flags);
452 iommu->root_entry = root;
453 spin_unlock_irqrestore(&iommu->lock, flags);
454
455 return 0;
456}
457
458#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
459{\
460 unsigned long start_time = jiffies;\
461 while (1) {\
462 sts = op (iommu->reg + offset);\
463 if (cond)\
464 break;\
465 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
466 panic("DMAR hardware is malfunctioning\n");\
467 cpu_relax();\
468 }\
469}
470
471static void iommu_set_root_entry(struct intel_iommu *iommu)
472{
473 void *addr;
474 u32 cmd, sts;
475 unsigned long flag;
476
477 addr = iommu->root_entry;
478
479 spin_lock_irqsave(&iommu->register_lock, flag);
480 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
481
482 cmd = iommu->gcmd | DMA_GCMD_SRTP;
483 writel(cmd, iommu->reg + DMAR_GCMD_REG);
484
485 /* Make sure hardware complete it */
486 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
487 readl, (sts & DMA_GSTS_RTPS), sts);
488
489 spin_unlock_irqrestore(&iommu->register_lock, flag);
490}
491
492static void iommu_flush_write_buffer(struct intel_iommu *iommu)
493{
494 u32 val;
495 unsigned long flag;
496
497 if (!cap_rwbf(iommu->cap))
498 return;
499 val = iommu->gcmd | DMA_GCMD_WBF;
500
501 spin_lock_irqsave(&iommu->register_lock, flag);
502 writel(val, iommu->reg + DMAR_GCMD_REG);
503
504 /* Make sure hardware complete it */
505 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
506 readl, (!(val & DMA_GSTS_WBFS)), val);
507
508 spin_unlock_irqrestore(&iommu->register_lock, flag);
509}
510
511/* return value determine if we need a write buffer flush */
512static int __iommu_flush_context(struct intel_iommu *iommu,
513 u16 did, u16 source_id, u8 function_mask, u64 type,
514 int non_present_entry_flush)
515{
516 u64 val = 0;
517 unsigned long flag;
518
519 /*
520 * In the non-present entry flush case, if hardware doesn't cache
521 * non-present entry we do nothing and if hardware cache non-present
522 * entry, we flush entries of domain 0 (the domain id is used to cache
523 * any non-present entries)
524 */
525 if (non_present_entry_flush) {
526 if (!cap_caching_mode(iommu->cap))
527 return 1;
528 else
529 did = 0;
530 }
531
532 switch (type) {
533 case DMA_CCMD_GLOBAL_INVL:
534 val = DMA_CCMD_GLOBAL_INVL;
535 break;
536 case DMA_CCMD_DOMAIN_INVL:
537 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
538 break;
539 case DMA_CCMD_DEVICE_INVL:
540 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
541 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
542 break;
543 default:
544 BUG();
545 }
546 val |= DMA_CCMD_ICC;
547
548 spin_lock_irqsave(&iommu->register_lock, flag);
549 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
550
551 /* Make sure hardware complete it */
552 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
553 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
554
555 spin_unlock_irqrestore(&iommu->register_lock, flag);
556
557 /* flush context entry will implictly flush write buffer */
558 return 0;
559}
560
561static int inline iommu_flush_context_global(struct intel_iommu *iommu,
562 int non_present_entry_flush)
563{
564 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
565 non_present_entry_flush);
566}
567
568static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
569 int non_present_entry_flush)
570{
571 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
572 non_present_entry_flush);
573}
574
575static int inline iommu_flush_context_device(struct intel_iommu *iommu,
576 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
577{
578 return __iommu_flush_context(iommu, did, source_id, function_mask,
579 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
580}
581
582/* return value determine if we need a write buffer flush */
583static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
584 u64 addr, unsigned int size_order, u64 type,
585 int non_present_entry_flush)
586{
587 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
588 u64 val = 0, val_iva = 0;
589 unsigned long flag;
590
591 /*
592 * In the non-present entry flush case, if hardware doesn't cache
593 * non-present entry we do nothing and if hardware cache non-present
594 * entry, we flush entries of domain 0 (the domain id is used to cache
595 * any non-present entries)
596 */
597 if (non_present_entry_flush) {
598 if (!cap_caching_mode(iommu->cap))
599 return 1;
600 else
601 did = 0;
602 }
603
604 switch (type) {
605 case DMA_TLB_GLOBAL_FLUSH:
606 /* global flush doesn't need set IVA_REG */
607 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
608 break;
609 case DMA_TLB_DSI_FLUSH:
610 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
611 break;
612 case DMA_TLB_PSI_FLUSH:
613 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
614 /* Note: always flush non-leaf currently */
615 val_iva = size_order | addr;
616 break;
617 default:
618 BUG();
619 }
620 /* Note: set drain read/write */
621#if 0
622 /*
623 * This is probably to be super secure.. Looks like we can
624 * ignore it without any impact.
625 */
626 if (cap_read_drain(iommu->cap))
627 val |= DMA_TLB_READ_DRAIN;
628#endif
629 if (cap_write_drain(iommu->cap))
630 val |= DMA_TLB_WRITE_DRAIN;
631
632 spin_lock_irqsave(&iommu->register_lock, flag);
633 /* Note: Only uses first TLB reg currently */
634 if (val_iva)
635 dmar_writeq(iommu->reg + tlb_offset, val_iva);
636 dmar_writeq(iommu->reg + tlb_offset + 8, val);
637
638 /* Make sure hardware complete it */
639 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
640 dmar_readq, (!(val & DMA_TLB_IVT)), val);
641
642 spin_unlock_irqrestore(&iommu->register_lock, flag);
643
644 /* check IOTLB invalidation granularity */
645 if (DMA_TLB_IAIG(val) == 0)
646 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
647 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
648 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
649 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
650 /* flush context entry will implictly flush write buffer */
651 return 0;
652}
653
654static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
655 int non_present_entry_flush)
656{
657 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
658 non_present_entry_flush);
659}
660
661static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
662 int non_present_entry_flush)
663{
664 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
665 non_present_entry_flush);
666}
667
668static int iommu_get_alignment(u64 base, unsigned int size)
669{
670 int t = 0;
671 u64 end;
672
673 end = base + size - 1;
674 while (base != end) {
675 t++;
676 base >>= 1;
677 end >>= 1;
678 }
679 return t;
680}
681
682static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
683 u64 addr, unsigned int pages, int non_present_entry_flush)
684{
685 unsigned int align;
686
687 BUG_ON(addr & (~PAGE_MASK_4K));
688 BUG_ON(pages == 0);
689
690 /* Fallback to domain selective flush if no PSI support */
691 if (!cap_pgsel_inv(iommu->cap))
692 return iommu_flush_iotlb_dsi(iommu, did,
693 non_present_entry_flush);
694
695 /*
696 * PSI requires page size to be 2 ^ x, and the base address is naturally
697 * aligned to the size
698 */
699 align = iommu_get_alignment(addr >> PAGE_SHIFT_4K, pages);
700 /* Fallback to domain selective flush if size is too big */
701 if (align > cap_max_amask_val(iommu->cap))
702 return iommu_flush_iotlb_dsi(iommu, did,
703 non_present_entry_flush);
704
705 addr >>= PAGE_SHIFT_4K + align;
706 addr <<= PAGE_SHIFT_4K + align;
707
708 return __iommu_flush_iotlb(iommu, did, addr, align,
709 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
710}
711
712static int iommu_enable_translation(struct intel_iommu *iommu)
713{
714 u32 sts;
715 unsigned long flags;
716
717 spin_lock_irqsave(&iommu->register_lock, flags);
718 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
719
720 /* Make sure hardware complete it */
721 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
722 readl, (sts & DMA_GSTS_TES), sts);
723
724 iommu->gcmd |= DMA_GCMD_TE;
725 spin_unlock_irqrestore(&iommu->register_lock, flags);
726 return 0;
727}
728
729static int iommu_disable_translation(struct intel_iommu *iommu)
730{
731 u32 sts;
732 unsigned long flag;
733
734 spin_lock_irqsave(&iommu->register_lock, flag);
735 iommu->gcmd &= ~DMA_GCMD_TE;
736 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
737
738 /* Make sure hardware complete it */
739 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
740 readl, (!(sts & DMA_GSTS_TES)), sts);
741
742 spin_unlock_irqrestore(&iommu->register_lock, flag);
743 return 0;
744}
745
746static int iommu_init_domains(struct intel_iommu *iommu)
747{
748 unsigned long ndomains;
749 unsigned long nlongs;
750
751 ndomains = cap_ndoms(iommu->cap);
752 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
753 nlongs = BITS_TO_LONGS(ndomains);
754
755 /* TBD: there might be 64K domains,
756 * consider other allocation for future chip
757 */
758 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
759 if (!iommu->domain_ids) {
760 printk(KERN_ERR "Allocating domain id array failed\n");
761 return -ENOMEM;
762 }
763 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
764 GFP_KERNEL);
765 if (!iommu->domains) {
766 printk(KERN_ERR "Allocating domain array failed\n");
767 kfree(iommu->domain_ids);
768 return -ENOMEM;
769 }
770
771 /*
772 * if Caching mode is set, then invalid translations are tagged
773 * with domainid 0. Hence we need to pre-allocate it.
774 */
775 if (cap_caching_mode(iommu->cap))
776 set_bit(0, iommu->domain_ids);
777 return 0;
778}
779
780static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
781{
782 struct intel_iommu *iommu;
783 int ret;
784 int map_size;
785 u32 ver;
786
787 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
788 if (!iommu)
789 return NULL;
790 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
791 if (!iommu->reg) {
792 printk(KERN_ERR "IOMMU: can't map the region\n");
793 goto error;
794 }
795 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
796 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
797
798 /* the registers might be more than one page */
799 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
800 cap_max_fault_reg_offset(iommu->cap));
801 map_size = PAGE_ALIGN_4K(map_size);
802 if (map_size > PAGE_SIZE_4K) {
803 iounmap(iommu->reg);
804 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
805 if (!iommu->reg) {
806 printk(KERN_ERR "IOMMU: can't map the region\n");
807 goto error;
808 }
809 }
810
811 ver = readl(iommu->reg + DMAR_VER_REG);
812 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
813 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
814 iommu->cap, iommu->ecap);
815 ret = iommu_init_domains(iommu);
816 if (ret)
817 goto error_unmap;
818 spin_lock_init(&iommu->lock);
819 spin_lock_init(&iommu->register_lock);
820
821 drhd->iommu = iommu;
822 return iommu;
823error_unmap:
824 iounmap(iommu->reg);
825 iommu->reg = 0;
826error:
827 kfree(iommu);
828 return NULL;
829}
830
831static void domain_exit(struct dmar_domain *domain);
832static void free_iommu(struct intel_iommu *iommu)
833{
834 struct dmar_domain *domain;
835 int i;
836
837 if (!iommu)
838 return;
839
840 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
841 for (; i < cap_ndoms(iommu->cap); ) {
842 domain = iommu->domains[i];
843 clear_bit(i, iommu->domain_ids);
844 domain_exit(domain);
845 i = find_next_bit(iommu->domain_ids,
846 cap_ndoms(iommu->cap), i+1);
847 }
848
849 if (iommu->gcmd & DMA_GCMD_TE)
850 iommu_disable_translation(iommu);
851
852 if (iommu->irq) {
853 set_irq_data(iommu->irq, NULL);
854 /* This will mask the irq */
855 free_irq(iommu->irq, iommu);
856 destroy_irq(iommu->irq);
857 }
858
859 kfree(iommu->domains);
860 kfree(iommu->domain_ids);
861
862 /* free context mapping */
863 free_context_table(iommu);
864
865 if (iommu->reg)
866 iounmap(iommu->reg);
867 kfree(iommu);
868}
869
870static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
871{
872 unsigned long num;
873 unsigned long ndomains;
874 struct dmar_domain *domain;
875 unsigned long flags;
876
877 domain = alloc_domain_mem();
878 if (!domain)
879 return NULL;
880
881 ndomains = cap_ndoms(iommu->cap);
882
883 spin_lock_irqsave(&iommu->lock, flags);
884 num = find_first_zero_bit(iommu->domain_ids, ndomains);
885 if (num >= ndomains) {
886 spin_unlock_irqrestore(&iommu->lock, flags);
887 free_domain_mem(domain);
888 printk(KERN_ERR "IOMMU: no free domain ids\n");
889 return NULL;
890 }
891
892 set_bit(num, iommu->domain_ids);
893 domain->id = num;
894 domain->iommu = iommu;
895 iommu->domains[num] = domain;
896 spin_unlock_irqrestore(&iommu->lock, flags);
897
898 return domain;
899}
900
901static void iommu_free_domain(struct dmar_domain *domain)
902{
903 unsigned long flags;
904
905 spin_lock_irqsave(&domain->iommu->lock, flags);
906 clear_bit(domain->id, domain->iommu->domain_ids);
907 spin_unlock_irqrestore(&domain->iommu->lock, flags);
908}
909
910static struct iova_domain reserved_iova_list;
911
912static void dmar_init_reserved_ranges(void)
913{
914 struct pci_dev *pdev = NULL;
915 struct iova *iova;
916 int i;
917 u64 addr, size;
918
919 init_iova_domain(&reserved_iova_list);
920
921 /* IOAPIC ranges shouldn't be accessed by DMA */
922 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
923 IOVA_PFN(IOAPIC_RANGE_END));
924 if (!iova)
925 printk(KERN_ERR "Reserve IOAPIC range failed\n");
926
927 /* Reserve all PCI MMIO to avoid peer-to-peer access */
928 for_each_pci_dev(pdev) {
929 struct resource *r;
930
931 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
932 r = &pdev->resource[i];
933 if (!r->flags || !(r->flags & IORESOURCE_MEM))
934 continue;
935 addr = r->start;
936 addr &= PAGE_MASK_4K;
937 size = r->end - addr;
938 size = PAGE_ALIGN_4K(size);
939 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
940 IOVA_PFN(size + addr) - 1);
941 if (!iova)
942 printk(KERN_ERR "Reserve iova failed\n");
943 }
944 }
945
946}
947
948static void domain_reserve_special_ranges(struct dmar_domain *domain)
949{
950 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
951}
952
953static inline int guestwidth_to_adjustwidth(int gaw)
954{
955 int agaw;
956 int r = (gaw - 12) % 9;
957
958 if (r == 0)
959 agaw = gaw;
960 else
961 agaw = gaw + 9 - r;
962 if (agaw > 64)
963 agaw = 64;
964 return agaw;
965}
966
967static int domain_init(struct dmar_domain *domain, int guest_width)
968{
969 struct intel_iommu *iommu;
970 int adjust_width, agaw;
971 unsigned long sagaw;
972
973 init_iova_domain(&domain->iovad);
974 spin_lock_init(&domain->mapping_lock);
975
976 domain_reserve_special_ranges(domain);
977
978 /* calculate AGAW */
979 iommu = domain->iommu;
980 if (guest_width > cap_mgaw(iommu->cap))
981 guest_width = cap_mgaw(iommu->cap);
982 domain->gaw = guest_width;
983 adjust_width = guestwidth_to_adjustwidth(guest_width);
984 agaw = width_to_agaw(adjust_width);
985 sagaw = cap_sagaw(iommu->cap);
986 if (!test_bit(agaw, &sagaw)) {
987 /* hardware doesn't support it, choose a bigger one */
988 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
989 agaw = find_next_bit(&sagaw, 5, agaw);
990 if (agaw >= 5)
991 return -ENODEV;
992 }
993 domain->agaw = agaw;
994 INIT_LIST_HEAD(&domain->devices);
995
996 /* always allocate the top pgd */
997 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
998 if (!domain->pgd)
999 return -ENOMEM;
1000 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1001 return 0;
1002}
1003
1004static void domain_exit(struct dmar_domain *domain)
1005{
1006 u64 end;
1007
1008 /* Domain 0 is reserved, so dont process it */
1009 if (!domain)
1010 return;
1011
1012 domain_remove_dev_info(domain);
1013 /* destroy iovas */
1014 put_iova_domain(&domain->iovad);
1015 end = DOMAIN_MAX_ADDR(domain->gaw);
1016 end = end & (~PAGE_MASK_4K);
1017
1018 /* clear ptes */
1019 dma_pte_clear_range(domain, 0, end);
1020
1021 /* free page tables */
1022 dma_pte_free_pagetable(domain, 0, end);
1023
1024 iommu_free_domain(domain);
1025 free_domain_mem(domain);
1026}
1027
1028static int domain_context_mapping_one(struct dmar_domain *domain,
1029 u8 bus, u8 devfn)
1030{
1031 struct context_entry *context;
1032 struct intel_iommu *iommu = domain->iommu;
1033 unsigned long flags;
1034
1035 pr_debug("Set context mapping for %02x:%02x.%d\n",
1036 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1037 BUG_ON(!domain->pgd);
1038 context = device_to_context_entry(iommu, bus, devfn);
1039 if (!context)
1040 return -ENOMEM;
1041 spin_lock_irqsave(&iommu->lock, flags);
1042 if (context_present(*context)) {
1043 spin_unlock_irqrestore(&iommu->lock, flags);
1044 return 0;
1045 }
1046
1047 context_set_domain_id(*context, domain->id);
1048 context_set_address_width(*context, domain->agaw);
1049 context_set_address_root(*context, virt_to_phys(domain->pgd));
1050 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1051 context_set_fault_enable(*context);
1052 context_set_present(*context);
1053 __iommu_flush_cache(iommu, context, sizeof(*context));
1054
1055 /* it's a non-present to present mapping */
1056 if (iommu_flush_context_device(iommu, domain->id,
1057 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1058 iommu_flush_write_buffer(iommu);
1059 else
1060 iommu_flush_iotlb_dsi(iommu, 0, 0);
1061 spin_unlock_irqrestore(&iommu->lock, flags);
1062 return 0;
1063}
1064
1065static int
1066domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1067{
1068 int ret;
1069 struct pci_dev *tmp, *parent;
1070
1071 ret = domain_context_mapping_one(domain, pdev->bus->number,
1072 pdev->devfn);
1073 if (ret)
1074 return ret;
1075
1076 /* dependent device mapping */
1077 tmp = pci_find_upstream_pcie_bridge(pdev);
1078 if (!tmp)
1079 return 0;
1080 /* Secondary interface's bus number and devfn 0 */
1081 parent = pdev->bus->self;
1082 while (parent != tmp) {
1083 ret = domain_context_mapping_one(domain, parent->bus->number,
1084 parent->devfn);
1085 if (ret)
1086 return ret;
1087 parent = parent->bus->self;
1088 }
1089 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1090 return domain_context_mapping_one(domain,
1091 tmp->subordinate->number, 0);
1092 else /* this is a legacy PCI bridge */
1093 return domain_context_mapping_one(domain,
1094 tmp->bus->number, tmp->devfn);
1095}
1096
1097static int domain_context_mapped(struct dmar_domain *domain,
1098 struct pci_dev *pdev)
1099{
1100 int ret;
1101 struct pci_dev *tmp, *parent;
1102
1103 ret = device_context_mapped(domain->iommu,
1104 pdev->bus->number, pdev->devfn);
1105 if (!ret)
1106 return ret;
1107 /* dependent device mapping */
1108 tmp = pci_find_upstream_pcie_bridge(pdev);
1109 if (!tmp)
1110 return ret;
1111 /* Secondary interface's bus number and devfn 0 */
1112 parent = pdev->bus->self;
1113 while (parent != tmp) {
1114 ret = device_context_mapped(domain->iommu, parent->bus->number,
1115 parent->devfn);
1116 if (!ret)
1117 return ret;
1118 parent = parent->bus->self;
1119 }
1120 if (tmp->is_pcie)
1121 return device_context_mapped(domain->iommu,
1122 tmp->subordinate->number, 0);
1123 else
1124 return device_context_mapped(domain->iommu,
1125 tmp->bus->number, tmp->devfn);
1126}
1127
1128static int
1129domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1130 u64 hpa, size_t size, int prot)
1131{
1132 u64 start_pfn, end_pfn;
1133 struct dma_pte *pte;
1134 int index;
1135
1136 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1137 return -EINVAL;
1138 iova &= PAGE_MASK_4K;
1139 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1140 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1141 index = 0;
1142 while (start_pfn < end_pfn) {
1143 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1144 if (!pte)
1145 return -ENOMEM;
1146 /* We don't need lock here, nobody else
1147 * touches the iova range
1148 */
1149 BUG_ON(dma_pte_addr(*pte));
1150 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1151 dma_set_pte_prot(*pte, prot);
1152 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1153 start_pfn++;
1154 index++;
1155 }
1156 return 0;
1157}
1158
1159static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1160{
1161 clear_context_table(domain->iommu, bus, devfn);
1162 iommu_flush_context_global(domain->iommu, 0);
1163 iommu_flush_iotlb_global(domain->iommu, 0);
1164}
1165
1166static void domain_remove_dev_info(struct dmar_domain *domain)
1167{
1168 struct device_domain_info *info;
1169 unsigned long flags;
1170
1171 spin_lock_irqsave(&device_domain_lock, flags);
1172 while (!list_empty(&domain->devices)) {
1173 info = list_entry(domain->devices.next,
1174 struct device_domain_info, link);
1175 list_del(&info->link);
1176 list_del(&info->global);
1177 if (info->dev)
1178 info->dev->sysdata = NULL;
1179 spin_unlock_irqrestore(&device_domain_lock, flags);
1180
1181 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1182 free_devinfo_mem(info);
1183
1184 spin_lock_irqsave(&device_domain_lock, flags);
1185 }
1186 spin_unlock_irqrestore(&device_domain_lock, flags);
1187}
1188
1189/*
1190 * find_domain
1191 * Note: we use struct pci_dev->sysdata stores the info
1192 */
1193struct dmar_domain *
1194find_domain(struct pci_dev *pdev)
1195{
1196 struct device_domain_info *info;
1197
1198 /* No lock here, assumes no domain exit in normal case */
1199 info = pdev->sysdata;
1200 if (info)
1201 return info->domain;
1202 return NULL;
1203}
1204
1205static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1206 struct pci_dev *dev)
1207{
1208 int index;
1209
1210 while (dev) {
1211 for (index = 0; index < cnt; index ++)
1212 if (dev == devices[index])
1213 return 1;
1214
1215 /* Check our parent */
1216 dev = dev->bus->self;
1217 }
1218
1219 return 0;
1220}
1221
1222static struct dmar_drhd_unit *
1223dmar_find_matched_drhd_unit(struct pci_dev *dev)
1224{
1225 struct dmar_drhd_unit *drhd = NULL;
1226
1227 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1228 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1229 drhd->devices_cnt, dev))
1230 return drhd;
1231 }
1232
1233 return NULL;
1234}
1235
1236/* domain is initialized */
1237static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1238{
1239 struct dmar_domain *domain, *found = NULL;
1240 struct intel_iommu *iommu;
1241 struct dmar_drhd_unit *drhd;
1242 struct device_domain_info *info, *tmp;
1243 struct pci_dev *dev_tmp;
1244 unsigned long flags;
1245 int bus = 0, devfn = 0;
1246
1247 domain = find_domain(pdev);
1248 if (domain)
1249 return domain;
1250
1251 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1252 if (dev_tmp) {
1253 if (dev_tmp->is_pcie) {
1254 bus = dev_tmp->subordinate->number;
1255 devfn = 0;
1256 } else {
1257 bus = dev_tmp->bus->number;
1258 devfn = dev_tmp->devfn;
1259 }
1260 spin_lock_irqsave(&device_domain_lock, flags);
1261 list_for_each_entry(info, &device_domain_list, global) {
1262 if (info->bus == bus && info->devfn == devfn) {
1263 found = info->domain;
1264 break;
1265 }
1266 }
1267 spin_unlock_irqrestore(&device_domain_lock, flags);
1268 /* pcie-pci bridge already has a domain, uses it */
1269 if (found) {
1270 domain = found;
1271 goto found_domain;
1272 }
1273 }
1274
1275 /* Allocate new domain for the device */
1276 drhd = dmar_find_matched_drhd_unit(pdev);
1277 if (!drhd) {
1278 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1279 pci_name(pdev));
1280 return NULL;
1281 }
1282 iommu = drhd->iommu;
1283
1284 domain = iommu_alloc_domain(iommu);
1285 if (!domain)
1286 goto error;
1287
1288 if (domain_init(domain, gaw)) {
1289 domain_exit(domain);
1290 goto error;
1291 }
1292
1293 /* register pcie-to-pci device */
1294 if (dev_tmp) {
1295 info = alloc_devinfo_mem();
1296 if (!info) {
1297 domain_exit(domain);
1298 goto error;
1299 }
1300 info->bus = bus;
1301 info->devfn = devfn;
1302 info->dev = NULL;
1303 info->domain = domain;
1304 /* This domain is shared by devices under p2p bridge */
1305 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1306
1307 /* pcie-to-pci bridge already has a domain, uses it */
1308 found = NULL;
1309 spin_lock_irqsave(&device_domain_lock, flags);
1310 list_for_each_entry(tmp, &device_domain_list, global) {
1311 if (tmp->bus == bus && tmp->devfn == devfn) {
1312 found = tmp->domain;
1313 break;
1314 }
1315 }
1316 if (found) {
1317 free_devinfo_mem(info);
1318 domain_exit(domain);
1319 domain = found;
1320 } else {
1321 list_add(&info->link, &domain->devices);
1322 list_add(&info->global, &device_domain_list);
1323 }
1324 spin_unlock_irqrestore(&device_domain_lock, flags);
1325 }
1326
1327found_domain:
1328 info = alloc_devinfo_mem();
1329 if (!info)
1330 goto error;
1331 info->bus = pdev->bus->number;
1332 info->devfn = pdev->devfn;
1333 info->dev = pdev;
1334 info->domain = domain;
1335 spin_lock_irqsave(&device_domain_lock, flags);
1336 /* somebody is fast */
1337 found = find_domain(pdev);
1338 if (found != NULL) {
1339 spin_unlock_irqrestore(&device_domain_lock, flags);
1340 if (found != domain) {
1341 domain_exit(domain);
1342 domain = found;
1343 }
1344 free_devinfo_mem(info);
1345 return domain;
1346 }
1347 list_add(&info->link, &domain->devices);
1348 list_add(&info->global, &device_domain_list);
1349 pdev->sysdata = info;
1350 spin_unlock_irqrestore(&device_domain_lock, flags);
1351 return domain;
1352error:
1353 /* recheck it here, maybe others set it */
1354 return find_domain(pdev);
1355}
1356
1357static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1358{
1359 struct dmar_domain *domain;
1360 unsigned long size;
1361 u64 base;
1362 int ret;
1363
1364 printk(KERN_INFO
1365 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1366 pci_name(pdev), start, end);
1367 /* page table init */
1368 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1369 if (!domain)
1370 return -ENOMEM;
1371
1372 /* The address might not be aligned */
1373 base = start & PAGE_MASK_4K;
1374 size = end - base;
1375 size = PAGE_ALIGN_4K(size);
1376 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1377 IOVA_PFN(base + size) - 1)) {
1378 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1379 ret = -ENOMEM;
1380 goto error;
1381 }
1382
1383 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1384 size, base, pci_name(pdev));
1385 /*
1386 * RMRR range might have overlap with physical memory range,
1387 * clear it first
1388 */
1389 dma_pte_clear_range(domain, base, base + size);
1390
1391 ret = domain_page_mapping(domain, base, base, size,
1392 DMA_PTE_READ|DMA_PTE_WRITE);
1393 if (ret)
1394 goto error;
1395
1396 /* context entry init */
1397 ret = domain_context_mapping(domain, pdev);
1398 if (!ret)
1399 return 0;
1400error:
1401 domain_exit(domain);
1402 return ret;
1403
1404}
1405
1406static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1407 struct pci_dev *pdev)
1408{
1409 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1410 return 0;
1411 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1412 rmrr->end_address + 1);
1413}
1414
1415int __init init_dmars(void)
1416{
1417 struct dmar_drhd_unit *drhd;
1418 struct dmar_rmrr_unit *rmrr;
1419 struct pci_dev *pdev;
1420 struct intel_iommu *iommu;
1421 int ret, unit = 0;
1422
1423 /*
1424 * for each drhd
1425 * allocate root
1426 * initialize and program root entry to not present
1427 * endfor
1428 */
1429 for_each_drhd_unit(drhd) {
1430 if (drhd->ignored)
1431 continue;
1432 iommu = alloc_iommu(drhd);
1433 if (!iommu) {
1434 ret = -ENOMEM;
1435 goto error;
1436 }
1437
1438 /*
1439 * TBD:
1440 * we could share the same root & context tables
1441 * amoung all IOMMU's. Need to Split it later.
1442 */
1443 ret = iommu_alloc_root_entry(iommu);
1444 if (ret) {
1445 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1446 goto error;
1447 }
1448 }
1449
1450 /*
1451 * For each rmrr
1452 * for each dev attached to rmrr
1453 * do
1454 * locate drhd for dev, alloc domain for dev
1455 * allocate free domain
1456 * allocate page table entries for rmrr
1457 * if context not allocated for bus
1458 * allocate and init context
1459 * set present in root table for this bus
1460 * init context with domain, translation etc
1461 * endfor
1462 * endfor
1463 */
1464 for_each_rmrr_units(rmrr) {
1465 int i;
1466 for (i = 0; i < rmrr->devices_cnt; i++) {
1467 pdev = rmrr->devices[i];
1468 /* some BIOS lists non-exist devices in DMAR table */
1469 if (!pdev)
1470 continue;
1471 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1472 if (ret)
1473 printk(KERN_ERR
1474 "IOMMU: mapping reserved region failed\n");
1475 }
1476 }
1477
1478 /*
1479 * for each drhd
1480 * enable fault log
1481 * global invalidate context cache
1482 * global invalidate iotlb
1483 * enable translation
1484 */
1485 for_each_drhd_unit(drhd) {
1486 if (drhd->ignored)
1487 continue;
1488 iommu = drhd->iommu;
1489 sprintf (iommu->name, "dmar%d", unit++);
1490
1491 iommu_flush_write_buffer(iommu);
1492
1493 iommu_set_root_entry(iommu);
1494
1495 iommu_flush_context_global(iommu, 0);
1496 iommu_flush_iotlb_global(iommu, 0);
1497
1498 ret = iommu_enable_translation(iommu);
1499 if (ret)
1500 goto error;
1501 }
1502
1503 return 0;
1504error:
1505 for_each_drhd_unit(drhd) {
1506 if (drhd->ignored)
1507 continue;
1508 iommu = drhd->iommu;
1509 free_iommu(iommu);
1510 }
1511 return ret;
1512}
1513
1514static inline u64 aligned_size(u64 host_addr, size_t size)
1515{
1516 u64 addr;
1517 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1518 return PAGE_ALIGN_4K(addr);
1519}
1520
1521struct iova *
1522iommu_alloc_iova(struct dmar_domain *domain, void *host_addr, size_t size,
1523 u64 start, u64 end)
1524{
1525 u64 start_addr;
1526 struct iova *piova;
1527
1528 /* Make sure it's in range */
1529 if ((start > DOMAIN_MAX_ADDR(domain->gaw)) || end < start)
1530 return NULL;
1531
1532 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1533 start_addr = PAGE_ALIGN_4K(start);
1534 size = aligned_size((u64)host_addr, size);
1535 if (!size || (start_addr + size > end))
1536 return NULL;
1537
1538 piova = alloc_iova(&domain->iovad,
1539 size >> PAGE_SHIFT_4K, IOVA_PFN(end));
1540
1541 return piova;
1542}
1543
1544static dma_addr_t __intel_map_single(struct device *dev, void *addr,
1545 size_t size, int dir, u64 *flush_addr, unsigned int *flush_size)
1546{
1547 struct dmar_domain *domain;
1548 struct pci_dev *pdev = to_pci_dev(dev);
1549 int ret;
1550 int prot = 0;
1551 struct iova *iova = NULL;
1552 u64 start_addr;
1553
1554 addr = (void *)virt_to_phys(addr);
1555
1556 domain = get_domain_for_dev(pdev,
1557 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1558 if (!domain) {
1559 printk(KERN_ERR
1560 "Allocating domain for %s failed", pci_name(pdev));
1561 return 0;
1562 }
1563
1564 start_addr = IOVA_START_ADDR;
1565
Keshavamurthy, Anil S7d3b03c2007-10-21 16:41:53 -07001566 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001567 iova = iommu_alloc_iova(domain, addr, size, start_addr,
1568 pdev->dma_mask);
1569 } else {
1570 /*
1571 * First try to allocate an io virtual address in
1572 * DMA_32BIT_MASK and if that fails then try allocating
1573 * from higer range
1574 */
1575 iova = iommu_alloc_iova(domain, addr, size, start_addr,
1576 DMA_32BIT_MASK);
1577 if (!iova)
1578 iova = iommu_alloc_iova(domain, addr, size, start_addr,
1579 pdev->dma_mask);
1580 }
1581
1582 if (!iova) {
1583 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1584 return 0;
1585 }
1586
1587 /* make sure context mapping is ok */
1588 if (unlikely(!domain_context_mapped(domain, pdev))) {
1589 ret = domain_context_mapping(domain, pdev);
1590 if (ret)
1591 goto error;
1592 }
1593
1594 /*
1595 * Check if DMAR supports zero-length reads on write only
1596 * mappings..
1597 */
1598 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1599 !cap_zlr(domain->iommu->cap))
1600 prot |= DMA_PTE_READ;
1601 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1602 prot |= DMA_PTE_WRITE;
1603 /*
1604 * addr - (addr + size) might be partial page, we should map the whole
1605 * page. Note: if two part of one page are separately mapped, we
1606 * might have two guest_addr mapping to the same host addr, but this
1607 * is not a big problem
1608 */
1609 ret = domain_page_mapping(domain, iova->pfn_lo << PAGE_SHIFT_4K,
1610 ((u64)addr) & PAGE_MASK_4K,
1611 (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K, prot);
1612 if (ret)
1613 goto error;
1614
1615 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1616 pci_name(pdev), size, (u64)addr,
1617 (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K,
1618 (u64)(iova->pfn_lo << PAGE_SHIFT_4K), dir);
1619
1620 *flush_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1621 *flush_size = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K;
1622 return (iova->pfn_lo << PAGE_SHIFT_4K) + ((u64)addr & (~PAGE_MASK_4K));
1623error:
1624 __free_iova(&domain->iovad, iova);
1625 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1626 pci_name(pdev), size, (u64)addr, dir);
1627 return 0;
1628}
1629
1630static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
1631 size_t size, int dir)
1632{
1633 struct pci_dev *pdev = to_pci_dev(hwdev);
1634 dma_addr_t ret;
1635 struct dmar_domain *domain;
1636 u64 flush_addr;
1637 unsigned int flush_size;
1638
1639 BUG_ON(dir == DMA_NONE);
1640 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1641 return virt_to_bus(addr);
1642
1643 ret = __intel_map_single(hwdev, addr, size,
1644 dir, &flush_addr, &flush_size);
1645 if (ret) {
1646 domain = find_domain(pdev);
1647 /* it's a non-present to present mapping */
1648 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
1649 flush_addr, flush_size >> PAGE_SHIFT_4K, 1))
1650 iommu_flush_write_buffer(domain->iommu);
1651 }
1652 return ret;
1653}
1654
1655static void __intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1656 size_t size, int dir, u64 *flush_addr, unsigned int *flush_size)
1657{
1658 struct dmar_domain *domain;
1659 struct pci_dev *pdev = to_pci_dev(dev);
1660 struct iova *iova;
1661
1662 domain = find_domain(pdev);
1663 BUG_ON(!domain);
1664
1665 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1666 if (!iova) {
1667 *flush_size = 0;
1668 return;
1669 }
1670 pr_debug("Device %s unmapping: %lx@%llx\n",
1671 pci_name(pdev),
1672 (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K,
1673 (u64)(iova->pfn_lo << PAGE_SHIFT_4K));
1674
1675 *flush_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1676 *flush_size = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K;
1677 /* clear the whole page, not just dev_addr - (dev_addr + size) */
1678 dma_pte_clear_range(domain, *flush_addr, *flush_addr + *flush_size);
1679 /* free page tables */
1680 dma_pte_free_pagetable(domain, *flush_addr, *flush_addr + *flush_size);
1681 /* free iova */
1682 __free_iova(&domain->iovad, iova);
1683}
1684
1685static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1686 size_t size, int dir)
1687{
1688 struct pci_dev *pdev = to_pci_dev(dev);
1689 struct dmar_domain *domain;
1690 u64 flush_addr;
1691 unsigned int flush_size;
1692
1693 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1694 return;
1695
1696 domain = find_domain(pdev);
1697 __intel_unmap_single(dev, dev_addr, size,
1698 dir, &flush_addr, &flush_size);
1699 if (flush_size == 0)
1700 return;
1701 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, flush_addr,
1702 flush_size >> PAGE_SHIFT_4K, 0))
1703 iommu_flush_write_buffer(domain->iommu);
1704}
1705
1706static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1707 dma_addr_t *dma_handle, gfp_t flags)
1708{
1709 void *vaddr;
1710 int order;
1711
1712 size = PAGE_ALIGN_4K(size);
1713 order = get_order(size);
1714 flags &= ~(GFP_DMA | GFP_DMA32);
1715
1716 vaddr = (void *)__get_free_pages(flags, order);
1717 if (!vaddr)
1718 return NULL;
1719 memset(vaddr, 0, size);
1720
1721 *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
1722 if (*dma_handle)
1723 return vaddr;
1724 free_pages((unsigned long)vaddr, order);
1725 return NULL;
1726}
1727
1728static void intel_free_coherent(struct device *hwdev, size_t size,
1729 void *vaddr, dma_addr_t dma_handle)
1730{
1731 int order;
1732
1733 size = PAGE_ALIGN_4K(size);
1734 order = get_order(size);
1735
1736 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
1737 free_pages((unsigned long)vaddr, order);
1738}
1739
1740static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sg,
1741 int nelems, int dir)
1742{
1743 int i;
1744 struct pci_dev *pdev = to_pci_dev(hwdev);
1745 struct dmar_domain *domain;
1746 u64 flush_addr;
1747 unsigned int flush_size;
1748
1749 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1750 return;
1751
1752 domain = find_domain(pdev);
1753 for (i = 0; i < nelems; i++, sg++)
1754 __intel_unmap_single(hwdev, sg->dma_address,
1755 sg->dma_length, dir, &flush_addr, &flush_size);
1756
1757 if (iommu_flush_iotlb_dsi(domain->iommu, domain->id, 0))
1758 iommu_flush_write_buffer(domain->iommu);
1759}
1760
1761#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
1762static int intel_nontranslate_map_sg(struct device *hddev,
1763 struct scatterlist *sg, int nelems, int dir)
1764{
1765 int i;
1766
1767 for (i = 0; i < nelems; i++) {
1768 struct scatterlist *s = &sg[i];
1769 BUG_ON(!s->page);
1770 s->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(s));
1771 s->dma_length = s->length;
1772 }
1773 return nelems;
1774}
1775
1776static int intel_map_sg(struct device *hwdev, struct scatterlist *sg,
1777 int nelems, int dir)
1778{
1779 void *addr;
1780 int i;
1781 dma_addr_t dma_handle;
1782 struct pci_dev *pdev = to_pci_dev(hwdev);
1783 struct dmar_domain *domain;
1784 u64 flush_addr;
1785 unsigned int flush_size;
1786
1787 BUG_ON(dir == DMA_NONE);
1788 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1789 return intel_nontranslate_map_sg(hwdev, sg, nelems, dir);
1790
1791 for (i = 0; i < nelems; i++, sg++) {
1792 addr = SG_ENT_VIRT_ADDRESS(sg);
1793 dma_handle = __intel_map_single(hwdev, addr,
1794 sg->length, dir, &flush_addr, &flush_size);
1795 if (!dma_handle) {
1796 intel_unmap_sg(hwdev, sg - i, i, dir);
1797 sg[0].dma_length = 0;
1798 return 0;
1799 }
1800 sg->dma_address = dma_handle;
1801 sg->dma_length = sg->length;
1802 }
1803
1804 domain = find_domain(pdev);
1805
1806 /* it's a non-present to present mapping */
1807 if (iommu_flush_iotlb_dsi(domain->iommu, domain->id, 1))
1808 iommu_flush_write_buffer(domain->iommu);
1809 return nelems;
1810}
1811
1812static struct dma_mapping_ops intel_dma_ops = {
1813 .alloc_coherent = intel_alloc_coherent,
1814 .free_coherent = intel_free_coherent,
1815 .map_single = intel_map_single,
1816 .unmap_single = intel_unmap_single,
1817 .map_sg = intel_map_sg,
1818 .unmap_sg = intel_unmap_sg,
1819};
1820
1821static inline int iommu_domain_cache_init(void)
1822{
1823 int ret = 0;
1824
1825 iommu_domain_cache = kmem_cache_create("iommu_domain",
1826 sizeof(struct dmar_domain),
1827 0,
1828 SLAB_HWCACHE_ALIGN,
1829
1830 NULL);
1831 if (!iommu_domain_cache) {
1832 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
1833 ret = -ENOMEM;
1834 }
1835
1836 return ret;
1837}
1838
1839static inline int iommu_devinfo_cache_init(void)
1840{
1841 int ret = 0;
1842
1843 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
1844 sizeof(struct device_domain_info),
1845 0,
1846 SLAB_HWCACHE_ALIGN,
1847
1848 NULL);
1849 if (!iommu_devinfo_cache) {
1850 printk(KERN_ERR "Couldn't create devinfo cache\n");
1851 ret = -ENOMEM;
1852 }
1853
1854 return ret;
1855}
1856
1857static inline int iommu_iova_cache_init(void)
1858{
1859 int ret = 0;
1860
1861 iommu_iova_cache = kmem_cache_create("iommu_iova",
1862 sizeof(struct iova),
1863 0,
1864 SLAB_HWCACHE_ALIGN,
1865
1866 NULL);
1867 if (!iommu_iova_cache) {
1868 printk(KERN_ERR "Couldn't create iova cache\n");
1869 ret = -ENOMEM;
1870 }
1871
1872 return ret;
1873}
1874
1875static int __init iommu_init_mempool(void)
1876{
1877 int ret;
1878 ret = iommu_iova_cache_init();
1879 if (ret)
1880 return ret;
1881
1882 ret = iommu_domain_cache_init();
1883 if (ret)
1884 goto domain_error;
1885
1886 ret = iommu_devinfo_cache_init();
1887 if (!ret)
1888 return ret;
1889
1890 kmem_cache_destroy(iommu_domain_cache);
1891domain_error:
1892 kmem_cache_destroy(iommu_iova_cache);
1893
1894 return -ENOMEM;
1895}
1896
1897static void __init iommu_exit_mempool(void)
1898{
1899 kmem_cache_destroy(iommu_devinfo_cache);
1900 kmem_cache_destroy(iommu_domain_cache);
1901 kmem_cache_destroy(iommu_iova_cache);
1902
1903}
1904
1905void __init detect_intel_iommu(void)
1906{
1907 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
1908 return;
1909 if (early_dmar_detect()) {
1910 iommu_detected = 1;
1911 }
1912}
1913
1914static void __init init_no_remapping_devices(void)
1915{
1916 struct dmar_drhd_unit *drhd;
1917
1918 for_each_drhd_unit(drhd) {
1919 if (!drhd->include_all) {
1920 int i;
1921 for (i = 0; i < drhd->devices_cnt; i++)
1922 if (drhd->devices[i] != NULL)
1923 break;
1924 /* ignore DMAR unit if no pci devices exist */
1925 if (i == drhd->devices_cnt)
1926 drhd->ignored = 1;
1927 }
1928 }
1929
1930 if (dmar_map_gfx)
1931 return;
1932
1933 for_each_drhd_unit(drhd) {
1934 int i;
1935 if (drhd->ignored || drhd->include_all)
1936 continue;
1937
1938 for (i = 0; i < drhd->devices_cnt; i++)
1939 if (drhd->devices[i] &&
1940 !IS_GFX_DEVICE(drhd->devices[i]))
1941 break;
1942
1943 if (i < drhd->devices_cnt)
1944 continue;
1945
1946 /* bypass IOMMU if it is just for gfx devices */
1947 drhd->ignored = 1;
1948 for (i = 0; i < drhd->devices_cnt; i++) {
1949 if (!drhd->devices[i])
1950 continue;
1951 drhd->devices[i]->sysdata = DUMMY_DEVICE_DOMAIN_INFO;
1952 }
1953 }
1954}
1955
1956int __init intel_iommu_init(void)
1957{
1958 int ret = 0;
1959
1960 if (no_iommu || swiotlb || dmar_disabled)
1961 return -ENODEV;
1962
1963 if (dmar_table_init())
1964 return -ENODEV;
1965
1966 iommu_init_mempool();
1967 dmar_init_reserved_ranges();
1968
1969 init_no_remapping_devices();
1970
1971 ret = init_dmars();
1972 if (ret) {
1973 printk(KERN_ERR "IOMMU: dmar init failed\n");
1974 put_iova_domain(&reserved_iova_list);
1975 iommu_exit_mempool();
1976 return ret;
1977 }
1978 printk(KERN_INFO
1979 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
1980
1981 force_iommu = 1;
1982 dma_ops = &intel_dma_ops;
1983 return 0;
1984}